20220506_10x_RZB

Lung ILCs (immune cells with partly residual T/B which need be kept in this version)
loading 50k cells, demo only get 13,474
(to increase datasize could get some more )

0604: plus data called 20k more in cellranger, total 35,367 cells
(as BAS/EOS/NEU relatively get much lower nGene comparing with ILCs/Macrophages)
(to get much better resolution of those granulocytes, may should increase datasize by 1x to 3x)

20240407
rerun initial process with currently modified workflow for the two-year-ago data
separate LUNG and BALF after preAnno

individual LUNG data plot several figures

20240705
add Stab2+ ILC2.2 vs ILC2.1 (CTLonly)

source("/Shared_win/projects/RNA_normal/analysis.10x.r")

load processed obj

GEX.seur <- readRDS("./sc10x_RZB.preAnno.rds")
GEX.seur
## An object of class Seurat 
## 18826 features across 21447 samples within 1 assay 
## Active assay: RNA (18826 features, 1265 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap

process and subset

GEX.seur <- subset(GEX.seur, subset = preAnno2 != "MIX")
GEX.seur
## An object of class Seurat 
## 18826 features across 21306 samples within 1 assay 
## Active assay: RNA (18826 features, 1265 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
GEX.seur <- subset(GEX.seur, subset = cnt %in% c("LUNG.CTL","LUNG.CKO"))
GEX.seur
## An object of class Seurat 
## 18826 features across 10837 samples within 1 assay 
## Active assay: RNA (18826 features, 1265 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
#
GEX.seur$preAnno1 <- factor(as.character(GEX.seur$preAnno1),
                            levels = c("Bcell","Plasma",
                                       "CD8T","CD4T","Treg","Tin",
                                       "NK1","NK2",
                                       "ILC2.CC","ILC2.1","ILC2.2",
                                       "BAS","EOS",
                                       paste0("NEU",1:5),
                                       "pDC","cDC","migDC",
                                       "Mono1","Mono2","IM1","IM2","IM3",
                                       "AM.CC","AM1","AM2","AM3"))

GEX.seur$preAnno2 <- factor(as.character(GEX.seur$preAnno2),
                            levels = c("Bcell","Tcell",
                                       "NK",
                                       "ILC2",
                                       "BAS","EOS",
                                       "NEU",
                                       "pDC","cDC","migDC",
                                       "Monocyte","IM",
                                       "AM"))

GEX.seur$rep <- paste0("rep",
                        gsub("LUNG.CTL|LUNG.CKO|BALF.CTL|BALF.CKO","",as.character(GEX.seur$FB.info)))


GEX.seur$FB.info <- factor(as.character(GEX.seur$FB.info),
                           levels = c("LUNG.CTL1","LUNG.CTL2",
                                      "LUNG.CKO1","LUNG.CKO2"))

#
color.FB <- c(ggsci::pal_d3("category20c")(20)[c(1,6,2,7)],
              ggsci::pal_d3("category20b")(20)[c(7,12,13,18)])


color.cnt <- color.FB[c(1,3,5,7)]   
color.cnt <- color.cnt[1:2]

#
color.pre1 <- c(scales::hue_pal()(32),"grey")
color.pre2 <- c(ggsci::pal_igv("default")(40)[c(2,3,6,7,8,10,12,22,18:19,38,21,30)])
pumap1 <- DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno2", cols = color.pre2) +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
pumap1

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "preAnno2", cols = color.pre2) &
  geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "preAnno2", cols = color.pre2)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "preAnno2", split.by = "FB.info", cols = color.FB[c(1,2,3,4)])
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

##
# if want much cleaner data, here could do advanced filtering celltype-individually
#
# actually some main celltypes like Bcell/Plasma, Tcell, ILCs 
#          are mixed with their cycling part,
#          so might should consider at preAnno1 level
#
# here the whole distribution seems just not very bad, let's keep it 
##

new

re-clustering

PCA

GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(GEX.seur), 10)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2

# exclude MT genes  and more 
            
DIG <- grep("^mt-|^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(#MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Pclaf, Dut, Snrpf, Ppp1r14b, Npm1, Ptma, Selenoh, Hlf, Ran, Lig1 
##     Gata3, Ranbp1, Mcm3, Anp32b, Nap1l1, Ldha, Nme1, Tuba1b, Lgals1, Tubb5 
##     Pdcd1, Eef1g, Snrpd1, Nucks1, Smc2, Set, Mif, Hnrnpab, Ramp1, Krtcap2 
## Negative:  S100a9, G0s2, S100a8, Clec4e, Acod1, Mmp9, Cd9, Retnlg, Marcksl1, Cxcl2 
##     Lrg1, Rdh12, Egr1, Isg15, Csf1, Ifitm1, Ccrl2, Stfa2l1, Cd24a, Ptgs2 
##     Asprv1, Cstdc4, Lcn2, Bcl2a1b, Il1f9, Wfdc17, Mmp8, Wfdc21, Ctsd, Ccl4 
## PC_ 2 
## Positive:  Rora, Gata3, Itk, Sh2d2a, Ctla2a, Ptprcap, Skap1, Hlf, Icos, Pdcd1 
##     Il1rl1, Ar, Ikzf3, Nkg7, Il2ra, Itgb7, Ptpn13, Pcsk1, Ccl5, Dkk3 
##     Cxcr6, Plod2, Hs3st1, Klrg1, Gzma, Tnfrsf18, Ctla4, Thy1, Ctla2b, Prf1 
## Negative:  Ccl9, Ctss, Psap, Mt1, Ifi30, H2-DMa, Lyz2, Ms4a6d, Fn1, Tnip3 
##     Cybb, Gatm, Dab2, Mafb, Myof, Lrp1, Ctsz, H2-DMb1, Cd74, App 
##     Anxa5, Lgals3, Fcgrt, H2-Ab1, Anxa4, H2-Aa, Naaa, Ccl24, Cd36, Mrc1 
## PC_ 3 
## Positive:  Ctsd, Cd9, Chil3, Atp6v0d2, Ctsk, Slc7a2, Mgll, Cxcl2, Csf1, S100a9 
##     Ftl1, Car4, Inhba, Fpr2, S100a8, F7, G0s2, Plet1, Cstb, Il1a 
##     Il11ra1, Olr1, Vat1, Myo1e, Fabp5, Ly75, Dst, Cxcl3, Krt79, Clec4n 
## Negative:  Mef2c, Tcf4, H2-Ab1, Rnase6, H2-Aa, Cd74, Siglech, Ms4a6c, Lifr, H2-Eb1 
##     H2-DMa, Cadm1, Ms4a4c, Spib, Klk1, Bcl11a, Napsa, Cbfa2t3, H2-DMb1, Ccr2 
##     Aif1, Irf8, Ccr9, Cox6a2, Kmo, Pltp, Plac8, Ly6c2, Sdc4, S100a4 
## PC_ 4 
## Positive:  S100a4, Ms4a6c, Tmem176b, Mafb, Aif1, Ms4a4c, Tmem176a, Plbd1, Ifitm3, Hlf 
##     Apoe, Pcsk1, F13a1, Stab2, Ifitm6, H2-DMb1, Il1rl1, Ace, Ccl24, Areg 
##     Ramp3, Pxdc1, Ccr2, Pdcd1, Calca, H2-Eb1, Hs3st1, Gata3, Adgre4, H2-DMa 
## Negative:  Irf8, Siglech, Klk1, Atp1b1, Cox6a2, Gzma, Spib, Nkg7, Tcf4, Prf1 
##     Ccl5, Cadm1, Ccr9, Klk1b27, Serpinb9, Gzmb, Cd7, Pacsin1, Ncr1, Eomes 
##     Mef2c, Ccnd2, Sh3bgr, Bcl11a, P2ry14, Tmem108, Serpinb6b, Cyb561a3, Blnk, Cd200 
## PC_ 5 
## Positive:  Siglech, Spib, Klk1, Cox6a2, Ccr9, Cadm1, Bcl11a, Klk1b27, S100a9, G0s2 
##     S100a8, Pacsin1, Rnase6, Sh3bgr, Kmo, Tcf4, Mef2c, Ftl1, Fcrla, Smim5 
##     Lifr, Pou2af1, Plac8, Cyb561a3, Cd79b, Bst2, Pdzd4, Cd79a, Hmgn3, Pclaf 
## Negative:  Nkg7, Ccl5, Gzma, Prf1, Serpinb9, Gzmb, Ncr1, Eomes, Serpinb6b, Irf8 
##     Serpinb9b, Sh2d2a, Nr4a2, Cma1, Klra3, Klri2, Klra7, Lgals1, H2afz, Itk 
##     Ccnd2, Rora, Rgs1, Car2, Klrg1, Gem, Ctla2a, Tnfrsf9, Sulf2, Ptma
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(DIG,CC_gene)))
## [1] 1267
setdiff(VariableFeatures(object = GEX.seur),
                                                c(DIG,CC_gene))[1:300]
##   [1] "Ccl22"     "Retnla"    "Ccl17"     "Aldh1a2"   "Hmox1"     "Mfge8"    
##   [7] "Ngp"       "Spp1"      "Il13"      "Fscn1"     "Il12b"     "Mucl1"    
##  [13] "Ccl8"      "Camp"      "Areg"      "Ltf"       "C1qa"      "Pf4"      
##  [19] "Fkbp11"    "Ly6d"      "Ccl1"      "Pou2af1"   "Ccl2"      "Cd209e"   
##  [25] "Ccl7"      "Calca"     "Retnlg"    "Mmp12"     "Cxcl10"    "Csf2"     
##  [31] "Klk1"      "Cd209d"    "Ctsk"      "C1qb"      "Tpsab1"    "Tph1"     
##  [37] "Ms4a1"     "Rsad2"     "Lcn2"      "Timp1"     "Cpa3"      "Ccr7"     
##  [43] "Clu"       "Ldoc1"     "Siglech"   "C1qc"      "Ccl24"     "Inhba"    
##  [49] "Cd209a"    "Fabp4"     "Cxcl3"     "Mgl2"      "Ch25h"     "Apoe"     
##  [55] "Slpi"      "Il6"       "Derl3"     "Il4i1"     "Cd79a"     "Krt79"    
##  [61] "Arg1"      "Cstdc5"    "Il5"       "H2-M2"     "Lyz1"      "Ccr9"     
##  [67] "Mmp13"     "Car4"      "Cst3"      "Il10"      "Ly6a"      "Edn1"     
##  [73] "Il17a"     "Saa3"      "Plet1"     "Zmynd15"   "Cxcl1"     "Cox6a2"   
##  [79] "Gata2"     "Cstdc4"    "Ccl4"      "Akap12"    "Xcr1"      "Fbp1"     
##  [85] "Spata7"    "Cd163l1"   "Lpl"       "Tbc1d4"    "Il2ra"     "Ckb"      
##  [91] "Gzmb"      "Chil3"     "Tnf"       "Cd207"     "Tff1"      "Serpina3g"
##  [97] "Cxcl9"     "Marco"     "Txndc5"    "Sept3"     "Tnfrsf17"  "Rbp4"     
## [103] "Olfm4"     "Asic3"     "Slc7a2"    "Mcpt8"     "Alox15"    "Eaf2"     
## [109] "Ccl3"      "Gpnmb"     "Dcstamp"   "Ctla2a"    "S100a8"    "Scgb1a1"  
## [115] "Gzmc"      "Atp6v0d2"  "Il17f"     "Ear6"      "Cma1"      "Ccl6"     
## [121] "Timd4"     "Mt3"       "Ccdc80"    "Mt2"       "Scin"      "Rnase2a"  
## [127] "Cd79b"     "Ear1"      "Cacnb3"    "Itgae"     "Klk4"      "Syt7"     
## [133] "Mrc1"      "Penk"      "Wfdc21"    "Mgll"      "Pcp4"      "Creld2"   
## [139] "Ifit1"     "Il1a"      "Cd36"      "Il2"       "Pclaf"     "Igfbp4"   
## [145] "Nudt17"    "Pcsk1"     "Ctla4"     "Ebf1"      "Hpgd"      "Serpinb2" 
## [151] "Pkib"      "Cfb"       "Fn1"       "Tcf4"      "Hal"       "Ifit2"    
## [157] "Mmp19"     "Cyp11a1"   "Fabp5"     "Ccl12"     "Fcrls"     "Fam71f2"  
## [163] "Prdx1"     "Ifitm1"    "Igf1"      "Ear2"      "Klk1b11"   "Stfa2"    
## [169] "Tnfrsf4"   "Ereg"      "Rep15"     "Ppbp"      "Xcl1"      "Hapln3"   
## [175] "Tcaf2"     "Il4"       "S100a1"    "Hs3st1"    "Olr1"      "Thbs1"    
## [181] "Fcer1a"    "Abcg1"     "Il1rl1"    "Lmo7"      "F7"        "Apod"     
## [187] "Ctsd"      "Zcwpw1"    "Ctsl"      "Dut"       "Hlf"       "Mmp10"    
## [193] "Tnfsf8"    "Mt1"       "Prg2"      "Pdia4"     "Ly6e"      "Fpr2"     
## [199] "Spib"      "Isg15"     "Cyp7b1"    "Dnase1l3"  "Cdh1"      "Ms4a4c"   
## [205] "Adig"      "Cbr2"      "Cd40"      "Rrad"      "Ly6g"      "F13a1"    
## [211] "Palld"     "H2-Eb1"    "Scd1"      "Lipc"      "Bcl11a"    "Wfdc2"    
## [217] "Slc1a2"    "Ccl9"      "Ly6c2"     "Cd63"      "Cybb"      "Ckap4"    
## [223] "Ifitm6"    "Pdcd1"     "Gata3"     "Flrt3"     "Stab2"     "Ifi44"    
## [229] "Itgax"     "Wdfy4"     "Cxcl16"    "Stfa2l1"   "Bank1"     "Mmp25"    
## [235] "Sdc4"      "Clec4n"    "Cmpk2"     "Krt18"     "Ms4a8a"    "Gfod2"    
## [241] "Naaa"      "Mef2c"     "Rgcc"      "Bcl2l14"   "Clec10a"   "Gatm"     
## [247] "Ctsg"      "Ikzf2"     "Olfr889"   "Prc1"      "Ptcra"     "Serpine1" 
## [253] "Tubb5"     "Pbld1"     "Errfi1"    "Cacna1s"   "Sgk1"      "Ocstamp"  
## [259] "H2-Ab1"    "Dqx1"      "S100a9"    "Egr4"      "Tnfsf11"   "Tnfrsf13c"
## [265] "Selenop"   "Mmp8"      "Fabp1"     "Gdf15"     "Lipa"      "Klk1b27"  
## [271] "Nrg1"      "Tgm2"      "Mafb"      "Ccna2"     "Epcam"     "Timp3"    
## [277] "Selenoh"   "Bcl2a1d"   "Cd209b"    "H2-Aa"     "Cxcl15"    "Blvrb"    
## [283] "Cxcr6"     "Plppr4"    "Slc18a2"   "Pdpn"      "Nrp2"      "Vat1"     
## [289] "Chl1"      "Ccl5"      "Il11ra1"   "Cspg4"     "Apoc2"     "Chchd10"  
## [295] "Cd163"     "Asprv1"    "H2afx"     "Anxa4"     "Tmem176a"  "Hes1"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:24

GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph', resolution = 2.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10837
## Number of edges: 444509
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8147
## Number of communities: 33
## Elapsed time: 1 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 125)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:23:58 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:23:58 Read 10837 rows and found 24 numeric columns
## 15:23:58 Using Annoy for neighbor search, n_neighbors = 20
## 15:23:58 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:24:00 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\Rtmp8IZHPY\file60cc298c291e
## 15:24:00 Searching Annoy index using 1 thread, search_k = 2000
## 15:24:02 Annoy recall = 100%
## 15:24:02 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 15:24:03 Initializing from normalized Laplacian + noise (using irlba)
## 15:24:04 Commencing optimization for 200 epochs, with 318868 positive edges
## 15:24:15 Optimization finished
#saveRDS(GEX.seur,"./sc10x_RZB.LUNG_test.rds")
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

DimPlot(GEX.seur, reduction = "tsne", label = T, group.by = "preAnno2", cols = color.pre2) + 
  DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno2", cols = color.pre2)

DimPlot(GEX.seur, reduction = "tsne", label = T, label.size = 2.5, group.by = "preAnno1", cols = color.pre1) + 
  DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno1", cols = color.pre1)

GEX.seur$sort_clusters <- factor(GEX.seur$seurat_clusters,
                                 levels = c(26,28,    # B Plasma
                                            11,13,24,   # Tcell
                                            12,29,0,5,32,   # NK
                                            7,22,16,19,    # ILC2
                                            31,    # BAS
                                            18,    # EOS
                                            1,2,6,8,9,   # NEU
                                            17,  # pDC
                                            30,25,    # cDC
                                            23,   # migDC
                                               
                                            14,20,3,  # Mono
                                            15,10,21,   # IM
                                            27,4     # AM
                                            ))
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters") &
  geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters")

check.markers <- c("Ptprc","Mki67",
                   "Cd19","Cd79a","Pax5",
                   "Sdc1","Jchain","Cd81",
                   "Cd3d","Cd8a","Cd8b1",
                   "Tcf7","Icos","Cd4","Foxp3",
                   "Klrb1c","Ncr1","Eomes","Nkg7",
                   "Gata3","Il1rl1","Ramp1","Il5",
                   "Areg","Il13","Calca",
                   "Csf2","Il6","Gata2","Cd200r3",
                   "Siglecf","Ear6","Ccr3",
                   "Itgam","Cd14","S100a8","S100a9",
                   "Mmp9",
                   "Siglech","Ccr9",
                   "Ly6c1","Ly6c2",
                   "P2ry14","Itgae","Xcr1","Clec9a",
                   "Cd209a","Clec10a",
                   "Fscn1","Ccr7","Calcb",
                   "Lyz2","Cx3cr1",
                   "Aif1","Csf1r","H2-DMa",
                   "H2-Aa","H2-Eb1",
                   "Mafb","Itgax","Mrc1","F7",
                   "Krt79","Car4","Il18",
                   "Chil3"
                   )
check.markers %in% rownames(GEX.seur)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE
pp.marker.sort <- DotPlot(GEX.seur, features = check.markers, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
pp.marker.sort

FeaturePlot(GEX.seur, features = c("Cd19","Cd3d","Cd8b1","Cd4",
                                   "Foxp3","Cxcr6","Il17re","Ccr6",
                                   "Cd81","Cx3cr1","Ly6c2","Mrc1"),
            ncol = 4)

# remove NK-NEU doublet C32
GEX.seur <- subset(GEX.seur, subset = seurat_clusters != c(32))
GEX.seur
## An object of class Seurat 
## 18826 features across 10800 samples within 1 assay 
## Active assay: RNA (18826 features, 1267 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") + 
  DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno2", cols = color.pre2)

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(26,28,    # B Plasma
                                            11,13,24,   # Tcell
                                            12,29,0,5,   # NK
                                            7,22,16,19,    # ILC2
                                            31,    # BAS
                                            18,    # EOS
                                            1,6,2,9,8,   # NEU
                                            17,  # pDC
                                            30,25,    # cDC
                                            23,   # migDC
                                            14,3,20,  # Mono
                                            15,10,21,   # IM
                                            27,4     # AM
                                            ))

find markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"

#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.25)
GEX.markers.pre <- read.table("sc10x_RZB.markers.LUNG_sort0409.csv", header = TRUE, sep = ",")
#GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
                          levels = levels(GEX.seur$sort_clusters))

markers.pre_t60 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t120 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 120, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
ttt = 997
ttt/60
## [1] 16.61667
ttt/64
## [1] 15.57812
ttt/65
## [1] 15.33846
pp.t60 <- list()

for(i in 1:17){
pp.t60[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t60[(60*i-59):(60*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}

pp.t60
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

ttt = 1781
ttt/60
## [1] 29.68333
ttt/64
## [1] 27.82812
ttt/65
## [1] 27.4
pp.t120 <- list()

for(i in 1:30){
pp.t120[[i]] <- DotPlot(GEX.seur, features = rev(markers.pre_t120[(60*i-59):(60*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}

#pp.t120
check.markers1 <- c("Ptprc","Mki67","Top2a",
                   "Pax5","Ebf1","Cd19",
                   "Cd81","Sdc1","Jchain",
                   "Cd3d","Cd8a","Cd8b1",
                   "Tcf7","Icos","Cd4","Foxp3",
                   "Klrb1c","Ncr1","Eomes","Nkg7",
                   "Ccl5","Gzma","Gzmb",
                   "Gata3","Il1rl1","Ramp1","Areg",
                   "Klrg1","Il13","Il5","Calca",
                   "Csf2","Stab2",
                   
                   "Il6","Gata2","Cd200r3",
                   "Serpinb2","Siglecf","Ear6","Ccr3",
                   "Adgre1","Cd24a","Retnlg",
                   "Itgam","Cd14","S100a8","S100a9",
                   "Csf3r","Cxcr2","Mmp9","Hp",
                   "Mmp8","Ly6g","Csf1","C3",
                   "Il1a","Icam1","Maff","Tnf",
                   
                   "Siglech","Ccr9",
                   "Ly6c1","Ly6c2",
                   "P2ry14","Itgae","Xcr1","Clec9a",
                   "H2-Aa","H2-Eb1","H2-DMa",
                   "Cd209a","Clec10a",
                   "Cd86","Ccl17","Ccl22","Ccr7","Fscn1",
                   "Calcb",
                   
                   
                   "Lyz2","Mafb","Plac8",
                   "F13a1",
                   "Ms4a4c","Ahr",
                   
                   "Aif1","Csf1r","Gpr141",
                   
                   "Cx3cr1","Ace","Adgre4",
                   "Lilra5","Gngt2",
                   
                   "Retnla",
                   
                   "Fcrls","Hr",
                   "C1qa","C1qc",
                   "Igf1","Ccl7","Il11ra1",
                   
                   "Mmp12",
                   "Itgax","Mrc1","Dab2","F7",
                   "Car4","Mgll","Krt79","Il18",
                   "Chil3"
                   )


pp.marker.sort1 <- DotPlot(GEX.seur, features = check.markers1, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
pp.marker.sort1

table(GEX.seur@meta.data[,c("sort_clusters","FB.info")])
##              FB.info
## sort_clusters LUNG.CTL1 LUNG.CTL2 LUNG.CKO1 LUNG.CKO2
##            26        28        34        39        34
##            28        16        34        43        23
##            11        45        61       142       185
##            13        72        49       173        86
##            24        23        45        33        49
##            12       115       106        92       114
##            29        23        27        21        18
##            0        227       189       158       215
##            5        133       136       120       114
##            7        193       150        65        84
##            22        74        31        35        19
##            16       106        84        57        71
##            19        83        36        42        60
##            31        10        13         7        12
##            18        46        68        55        70
##            1        168       213       209       188
##            6        111       121       140       127
##            2        189       146       203       222
##            9         72        90       156       130
##            8         74        92       202        90
##            17        60        66        46        91
##            30        21        28        11        23
##            25        36        39        33        39
##            23        57        29        32        35
##            14        66        76        94       113
##            3         92       159       128       175
##            20        42        34        62        59
##            15       100        97        52        80
##            10       106       100       131       104
##            21        54        44        51        46
##            27        50        28        29        18
##            4        207       102       111       108

Anno

#### Anno1
##
# Bcell         26          Pax5+Ebf1+Cd19+Cd79a+
# Plasma        28          Sdc1+Jchain+Cd19(lo)Cd79a(lo)     Mki67+cycling      Cd81+ (also some in ILC2, cDC1, IM1, AM)
# 
# Tcell      Cd3d+
# CD8T          11          Cd8a+Cd8b1+
# Th/Tin        13          should be mix of Th(left)/Tin(up, innate like iNK MAIT, gdT)/NKT(right)    partly  Cd4+
# Treg          24          Cd4+Foxp3+       partly 'ILC2 markers' +
#
# NK1/2         12,   29,0,5             Klrb1c+Ncr1+Eomes+   Nkg7/Ccl5+(some in Tcell)
#                                   NK1   Gzma(lo)Gzmb-   NK2    Gzma+Gzmb+
#               
# ILC2                 Gata3+Il1rl1+Ramp1+
# ILC2.CC       7/22        Mki67+cycling
# ILC2.1        16            Areg+Il5+Klrg1+Il13+Csf2(lo)Stab2(lo)    still a little part cycling  
# ILC2.2        19            Areg+Il5+Klrg1(lo)Il13(lo)Csf2+Stab2+
##
# BAS           31         Gata2+Il6+Cd200r3+            (small number, and seems very few cells may mix with ILC2)
# 
# EOS           18         Serpinb2+   Siglecf/Ear6/Ccr3/Adgre1(F4/80)+ but not many detected in singlecell data
#                                Adgre1(F4/80)      also some in Bcell,Mono,IM,AM     AM higher than IM                               
#
# NEU1/2/3      1,  6,2,  9, 8             Cd14/S100a8/S100a9/Csf3r hi    Cxcr2/Mmp9/Hp  decrease from NEU1 to NEU3
#                             NEU1    Mmp8+ little Ly6g detected        
#                             NEU2    Csf1(lo)C3(lo)           
#                             NEU3   Csf1+C3+Il1a+Icam1+Maff+Tnf+  
#                    Itgam(CD11b)+     also in EOS, cDC2, Mono, IM         few in AM 
#
# pDC          17      Siglech+Ccr9+Ly6c1+Ly6c2+    P2ry14+ (also in cDC1, AM)
# cDC1         30      Xcr1+Clec9a+                   Itgae(CD103)+   also some in Treg            partly  Mki67+cycling
# cDC2         25      Cd209a+Clec10a  (some few also in pDC)
# migDC        23      Ccl17+Ccl22+Ccr7+Fscn1+         (some also in IM1, the boundary between them two might be not very clear)
##
# sometimes for Monocyte/Macrophage(/DC)  clustering/annotation, it might be a little complicated to describe with the joint parts
#       because they all, or some subsets all co-expressed a few myeloid pan-markers, and have mixed high and low levels
#       here just try to make it clear  (current marker list is manually selected from top60/120 markers)
#
# Mono1/2/3    14,  3,  20       Lyz2/Mafb highest (not for Mono3)   Plac8 highest (also high in pDC)    Gpr141+Cx3cr1+
#              Mono1    Ly6c1/Ly6c2+  F13a1+  MHCII (-/lo)  Ms4a4c higher  Ahr higher
#              Mono2    Ly6c1/Ly6c2-   F13a1(lo)   MHCII+ like H2-Aa/Eb1/DMa..    Cd86 higher  Aif1 higher
#                         looks more likely to DC/IM 
#              Mono3    Ly6c1/Ly6c2-   F13a1-   MHCII+ (lower than Mono2)  Ace/Adgre4/Gngt2 higher  Lira5+
#                         Lira5+Gngt2+  seems like it could shift to AM              
#
# IM1/2/3      15,  10,   21           Retnla+ (low in IM3)   Itgax(Cd11c)+Mrc1+Dab2+ (higher than Mono/DC, but lower than AM)
#              IM1            Fcrls+Hr+  tissue-resident like      Cd86+Ccl17+Cd81+Igf1-F7+        Mmp12/Itgax higher    
#              IM2            C1qa/b/c+     Cd86+Ccl17-Cd81-Igf1+F7(lo)    Mafb/Aif1/Csf1r higher
#              IM3            C1qa/b/c(lo)    Cd86-Ccl17-Cd81-Igf1+F7+    Ccl7+   Il11ra1+ 
#
# AM.CC        27               Mki67+cycling
# AM           4                 Il11ra1/Itgax(Cd11c)/Mrc1/Dab2/F7/Chil3  hi          Car4/Mgll/Krt79/Il18+
##
####
# Anno1
GEX.seur$Anno1 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(26)] <- "Bcell"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(28)] <- "Plasma"

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(11)] <- "CD8T"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(13)] <- "Th/Tin"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(24)] <- "Treg"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(12)] <- "NK.1"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(29,0,5)] <- "NK.2"

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(7,22)] <- "ILC2.0"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(16)] <- "ILC2.1"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(19)] <- "ILC2.2"

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(31)] <- "BAS"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(18)] <- "EOS"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(1)] <- "NEU.1"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(6,2)] <- "NEU.2"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(9,8)] <- "NEU.3"

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(17)] <- "pDC"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(30)] <- "cDC1"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(25)] <- "cDC2"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(23)] <- "migDC"

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(14)] <- "Mono.1"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(3)] <- "Mono.2"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(20)] <- "Mono.3"

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(15)] <- "IM.1"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(10)] <- "IM.2"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(21)] <- "IM.3"

GEX.seur$Anno1[GEX.seur$Anno1 %in% c(27)] <- "AM.0"
GEX.seur$Anno1[GEX.seur$Anno1 %in% c(4)] <- "AM.1"

GEX.seur$Anno1 <- factor(GEX.seur$Anno1,
                         levels = c("Bcell","Plasma",
                                    "CD8T","Th/Tin","Treg",
                                    paste0("NK.",1:2),
                                    paste0("ILC2.",0:2),
                                    "BAS","EOS",
                                    paste0("NEU.",1:3),
                                    "pDC","cDC1","cDC2","migDC",
                                    paste0("Mono.",1:3),
                                    paste0("IM.",1:3),
                                    paste0("AM.",0:1)))



# Anno2
GEX.seur$Anno2 <- as.character(GEX.seur$seurat_clusters)

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(26,28)] <- "Bcell"

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(11,13,24)] <- "Tcell"
GEX.seur$Anno2[GEX.seur$Anno2 %in% c(12,29,0,5)] <- "NK"

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(7,22,16,19)] <- "ILC2"

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(31)] <- "BAS"
GEX.seur$Anno2[GEX.seur$Anno2 %in% c(18)] <- "EOS"
GEX.seur$Anno2[GEX.seur$Anno2 %in% c(1,6,2,9,8)] <- "NEU"

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(17)] <- "pDC"
GEX.seur$Anno2[GEX.seur$Anno2 %in% c(30,25)] <- "cDC"
GEX.seur$Anno2[GEX.seur$Anno2 %in% c(23)] <- "migDC"

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(14,3,20)] <- "Mono"

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(15,10,21)] <- "IM"

GEX.seur$Anno2[GEX.seur$Anno2 %in% c(27,4)] <- "AM"

GEX.seur$Anno2 <- factor(GEX.seur$Anno2,
                         levels = c("Bcell",
                                    "Tcell","NK","ILC2",
                                    "BAS","EOS","NEU",
                                    "pDC","cDC","migDC",
                                    "Mono","IM","AM"))
levels(GEX.seur$Anno1)
##  [1] "Bcell"  "Plasma" "CD8T"   "Th/Tin" "Treg"   "NK.1"   "NK.2"   "ILC2.0"
##  [9] "ILC2.1" "ILC2.2" "BAS"    "EOS"    "NEU.1"  "NEU.2"  "NEU.3"  "pDC"   
## [17] "cDC1"   "cDC2"   "migDC"  "Mono.1" "Mono.2" "Mono.3" "IM.1"   "IM.2"  
## [25] "IM.3"   "AM.0"   "AM.1"
levels(GEX.seur$Anno2)
##  [1] "Bcell" "Tcell" "NK"    "ILC2"  "BAS"   "EOS"   "NEU"   "pDC"   "cDC"  
## [10] "migDC" "Mono"  "IM"    "AM"
length(levels(GEX.seur$Anno1))
## [1] 27
length(levels(GEX.seur$Anno2))
## [1] 13
pumap1 <- DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
pumap1

GEX.seur$UMAP_1 <- GEX.seur@reductions[['umap']]@cell.embeddings[,1]
GEX.seur$UMAP_2 <- GEX.seur@reductions[['umap']]@cell.embeddings[,2]
pumap1s <- DimPlot(subset(GEX.seur, subset=Anno2 %in% c("ILC2") & UMAP_2 >8 ), reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(subset(GEX.seur, subset=Anno2 %in% c("ILC2")  & UMAP_2 >8 ), reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
pumap1s

scales::show_col(ggsci::pal_igv("default")(49))

scales::show_col(color.pre2)

# manually set annotation colors
color.A1 <-  ggsci::pal_igv("default")(49)[c(2,49,
                                             3,16,24,        # T
                                             6,14,           # NK
                                             7,1,5,          # ILC2
                                             8,10,            
                                             15,12,25,       # NEU
                                             22,
                                             18,9,           # cDC
                                             46,
                                             38,23,42,       # Mono
                                             4,34,21,        # IM
                                             43,30           # AM
                                             )]
names(color.A1) <- levels(GEX.seur$Anno1)

#
color.A2 <- c(ggsci::pal_igv("default")(49)[c(2,3,14,7,8,10,12,22,18,46,23,21,30)])
names(color.A2) <- levels(GEX.seur$Anno2)


#write.table(data.frame(ggsci.pal_igv=color.A1),"./figure20240407/1.LUNG_only/color.Anno1.txt",sep = "\t", col.names = T, row.names = T, quote = F)
#write.table(data.frame(ggsci.pal_igv=color.A2),"./figure20240407/1.LUNG_only/color.Anno2.txt",sep = "\t", col.names = T, row.names = T, quote = F)
pumap2 <-DimPlot(GEX.seur, reduction = "umap", label = T, repel = T, label.size = 2.5, group.by = "Anno1", cols = color.A1) + 
  DimPlot(GEX.seur, reduction = "umap", label = T, repel = T, label.size = 3.75, group.by = "Anno2", cols = color.A2)
pumap2

pumap2s <-DimPlot(subset(GEX.seur, subset=Anno2 %in% c("ILC2") & UMAP_2 >8 ), 
                  reduction = "umap", label = T, repel = T, label.size = 3.5, group.by = "Anno1", cols = color.A1) + 
  DimPlot(subset(GEX.seur, subset=Anno2 %in% c("ILC2") & UMAP_2 >8 ), 
          reduction = "umap", label = T, repel = T, label.size = 3.75, group.by = "Anno2", cols = color.A2)
pumap2s

pp.marker.a1 <- DotPlot(GEX.seur, features = check.markers1, group.by = "Anno1",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
pp.marker.a1

pp.marker.a2 <- DotPlot(GEX.seur, features = check.markers1, group.by = "Anno2",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
pp.marker.a2

length(check.markers1)
## [1] 110
#write.table(check.markers1,"figure20240407/1.LUNG_only/marker.final/marker.final.long.txt", col.names = F, row.names = F, quote = F)
pp.check1 <- list()

for(nn in check.markers1){
  
  pp.check1[[nn]] <- FeaturePlot(GEX.seur,features = nn, cols = c("lightgrey","red"))
  
  
}
cowplot::plot_grid(plotlist = pp.check1, ncol = 4)

length(pp.check1)
## [1] 110

QC distribution

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2, cols = color.A1,
        pt.size = 0, group.by = "Anno1") &
  geom_jitter(alpha=0.15, shape=16, width = 0.3 ,size = 0.12)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2, cols = color.A1,
        pt.size = 0, group.by = "Anno1")

Anno markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "Anno1"

#GEX.markers.Anno1 <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.25)
GEX.markers.Anno1 <- read.table("sc10x_RZB.markers.LUNG_Anno1.0410.csv", header = TRUE, sep = ",")
#GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
GEX.markers.Anno1$cluster <- factor(as.character(GEX.markers.Anno1$cluster),
                          levels = levels(GEX.seur$Anno1))

markers.Anno1_t60 <- (GEX.markers.Anno1 %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.Anno1$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.Anno1_t120 <- (GEX.markers.Anno1 %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.Anno1$gene,invert = T,value = T)) %>%
                   top_n(n = 120, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
ttt = 931
ttt/60
## [1] 15.51667
ttt/64
## [1] 14.54688
ttt/65
## [1] 14.32308
pp.Anno1.t60 <- list()

for(i in 1:16){
pp.Anno1.t60[[i]] <- DotPlot(GEX.seur, features = rev(markers.Anno1_t60[(60*i-59):(60*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}

pp.Anno1.t60
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

ttt = 1671
ttt/60
## [1] 27.85
ttt/64
## [1] 26.10938
ttt/65
## [1] 25.70769
pp.Anno1.t120 <- list()

for(i in 1:28){
pp.Anno1.t120[[i]] <- DotPlot(GEX.seur, features = rev(markers.Anno1_t120[(60*i-59):(60*i)]))  + coord_flip() + 
           theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
}

#pp.Anno1.t120

final markers

short version

check.markers2 <- c(
                   # Bcell
                   "Ebf1","Cd19","Sdc1","Jchain",
                   "Cd3d","Cd8a","Cd8b1",
                   "Tcf7","Icos","Cd4","Foxp3",
                   "Klrb1c","Ncr1","Eomes",#"Nkg7",
                   #"Ccl5","Gzma",
                   "Gzmb",
                   
                   # ILC2
                   "Mki67",
                   "Gata3","Il1rl1",
                   #"Ramp1","Areg","Klrg1",
                   "Il13","Il5","Calca",
                   "Csf2",#"Stab2",
                   
                   "Il6","Gata2","Cd200r3",
                   "Serpinb2","Siglecf",#"Ear6","Ccr3",
                   "Adgre1",#"Cd24a",
                   "Retnlg",
                   "Itgam","Cd14","S100a8",#"S100a9",
                   #"Csf3r","Cxcr2",
                   "Mmp8","Mmp9","Hp",
                   #"Ly6g","C3","Il1a","Icam1","Maff",
                   "Csf1","Tnf",
                   
                   "Siglech","Ccr9",
                   #"Ly6c1",
                   "Ly6c2",
                   "P2ry14","Itgae","Xcr1","Clec9a",
                   #"H2-Aa",
                   "H2-Eb1","H2-DMa","Cd209a","Clec10a",
                   #"Cd86","Ccl17",
                   "Ccl22","Ccr7","Fscn1",
                   #"Calcb",
                   
                   
                   "Lyz2",#"Mafb",
                   "Plac8","F13a1",
                   "Ms4a4c",#"Ahr",
                   
                   "Aif1","Csf1r","Gpr141",
                   
                   "Cx3cr1","Ace","Adgre4",
                   "Lilra5","Gngt2",
                   
                   "Retnla",
                   
                   "Fcrls","Hr",
                   "C1qa",#"C1qc",
                   "Igf1","Ccl7","Il11ra1",
                   
                   "Mmp12",
                   "Itgax","Mrc1","Dab2","F7",
                   "Car4"#,"Mgll","Krt79","Il18","Chil3"
                   
                   )
length(check.markers2)
## [1] 76
pp.short.Anno1 <- DotPlot(GEX.seur, features = check.markers2, group.by = "Anno1",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
pp.short.Anno1

pp.short.Anno2 <- DotPlot(GEX.seur, features = check.markers2, group.by = "Anno2",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)
pp.short.Anno2

## define feature colors
# Cell2020     
material.heat = function(n){
  colorRampPalette(
    c(
      "#283593", # indigo 800
      "#3F51B5", # indigo
      "#2196F3", # blue
      "#00BCD4", # cyan
      "#4CAF50", # green
      "#8BC34A", # light green
      "#CDDC39", # lime
      "#FFEB3B", # yellow
      "#FFC107", # amber
      "#FF9800", # orange
      "#FF5722"  # deep orange
    )
  )(n)
}

# Immunity2019, na gray
colors.Immunity <-c("#191970","#121285","#0C0C9A","#0707B0","#0101C5","#0014CF","#0033D3","#0053D8","#0072DD","#0092E1","#00B2E6",
                  "#00D1EB","#23E8CD","#7AF17B","#D2FA29","#FFEB00","#FFC300","#FF9B00","#FF8400","#FF7800","#FF6B00","#FF5F00","#FF5300",
                  "#FF4700","#F73B00","#EF2E00","#E62300","#DD1700","#D50B00","#CD0000")


# NatNeur2021, sc-neurons
color.ref <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
               "#C03778","#97BA59","#DFAB16","#BF7E6B",
               "#D46B35","#BDAE8D","#BD66C4","#2BA956",
               "#FF8080","#FF8080","#FF8080","#FF0000")
pp.short.Anno1+ 
  scale_color_gradientn(colours  = material.heat(100))

pp.short.Anno2+ 
  scale_color_gradientn(colours  = material.heat(100))

pp.short.Anno1.t <- DotPlot(GEX.seur, features = check.markers2, group.by = "Anno1",
        cols = c("midnightblue","darkorange1")) +
  coord_flip() +
 theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100))
  #scale_x_discrete(limits=rev) + scale_y_discrete(limits=rev) + 
pp.short.Anno1.t

pp.short.Anno2.t <- DotPlot(GEX.seur, features = check.markers2, group.by = "Anno2",
        cols = c("midnightblue","darkorange1")) +
  coord_flip() +
 theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100))
  #scale_x_discrete(limits=rev) + scale_y_discrete(limits=rev) + 
pp.short.Anno2.t

#write.table(check.markers2,"figure20240407/1.LUNG_only/marker.final/marker.final.short.txt", col.names = F, row.names = F, quote = F)

cell composition

pumap3 <- DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 2, cols = color.FB)
pumap3

pumap4 <- DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "Anno1", split.by = "FB.info", 
        ncol = 2, cols = color.A1)
pumap4

pumap5 <- DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "Anno2", split.by = "FB.info", 
        ncol = 2, cols = color.A2)
pumap5

stacked bar

Anno1

GEX.seur$cnt <- factor(as.character(GEX.seur$cnt),
                  levels = c("LUNG.CTL","LUNG.CKO"))
stat.A1.a <- table(GEX.seur@meta.data[,c("cnt","Anno1")])
stat.A1.a
##           Anno1
## cnt        Bcell Plasma CD8T Th/Tin Treg NK.1 NK.2 ILC2.0 ILC2.1 ILC2.2 BAS EOS
##   LUNG.CTL    62     50  106    121   68  221  735    448    190    119  23 114
##   LUNG.CKO    73     66  327    259   82  206  646    203    128    102  19 125
##           Anno1
## cnt        NEU.1 NEU.2 NEU.3 pDC cDC1 cDC2 migDC Mono.1 Mono.2 Mono.3 IM.1 IM.2
##   LUNG.CTL   381   567   328 126   49   75    86    142    251     76  197  206
##   LUNG.CKO   397   692   578 137   34   72    67    207    303    121  132  235
##           Anno1
## cnt        IM.3 AM.0 AM.1
##   LUNG.CTL   98   78  309
##   LUNG.CKO   97   47  219
melt.A1.a <- reshape2::melt(stat.A1.a)
melt.A1.a$Anno1 <- factor(as.character(melt.A1.a$Anno1),
                                levels = rev(levels(GEX.seur$Anno1)))

#color.stat1 <- color.pre1
#names(color.stat1) <- levels(GEX.seur$Anno1)

pstat.A1.a <- ggplot(melt.A1.a, aes(cnt, weight=value, fill=Anno1)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.A1[rev(levels(GEX.seur$Anno1))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
pstat.A1.a

stat.A1.b <- table(GEX.seur@meta.data[,c("FB.info","Anno1")])
stat.A1.b
##            Anno1
## FB.info     Bcell Plasma CD8T Th/Tin Treg NK.1 NK.2 ILC2.0 ILC2.1 ILC2.2 BAS
##   LUNG.CTL1    28     16   45     72   23  115  383    267    106     83  10
##   LUNG.CTL2    34     34   61     49   45  106  352    181     84     36  13
##   LUNG.CKO1    39     43  142    173   33   92  299    100     57     42   7
##   LUNG.CKO2    34     23  185     86   49  114  347    103     71     60  12
##            Anno1
## FB.info     EOS NEU.1 NEU.2 NEU.3 pDC cDC1 cDC2 migDC Mono.1 Mono.2 Mono.3 IM.1
##   LUNG.CTL1  46   168   300   146  60   21   36    57     66     92     42  100
##   LUNG.CTL2  68   213   267   182  66   28   39    29     76    159     34   97
##   LUNG.CKO1  55   209   343   358  46   11   33    32     94    128     62   52
##   LUNG.CKO2  70   188   349   220  91   23   39    35    113    175     59   80
##            Anno1
## FB.info     IM.2 IM.3 AM.0 AM.1
##   LUNG.CTL1  106   54   50  207
##   LUNG.CTL2  100   44   28  102
##   LUNG.CKO1  131   51   29  111
##   LUNG.CKO2  104   46   18  108
melt.A1.b <- reshape2::melt(stat.A1.b)
melt.A1.b$Anno1 <- factor(as.character(melt.A1.b$Anno1),
                                levels = rev(levels(GEX.seur$Anno1)))

#color.stat1 <- color.pre1
#names(color.stat1) <- levels(GEX.seur$Anno1)

pstat.A1.b <- ggplot(melt.A1.b, aes(FB.info, weight=value, fill=Anno1)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.A1[rev(levels(GEX.seur$Anno1))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
pstat.A1.b

Anno2

#GEX.seur$cnt <- factor(as.character(GEX.seur$cnt),
#                  levels = c("LUNG.CTL","LUNG.CKO"))
stat.A2.a <- table(GEX.seur@meta.data[,c("cnt","Anno2")])
stat.A2.a
##           Anno2
## cnt        Bcell Tcell   NK ILC2  BAS  EOS  NEU  pDC  cDC migDC Mono   IM   AM
##   LUNG.CTL   112   295  956  757   23  114 1276  126  124    86  469  501  387
##   LUNG.CKO   139   668  852  433   19  125 1667  137  106    67  631  464  266
melt.A2.a <- reshape2::melt(stat.A2.a)
melt.A2.a$Anno2 <- factor(as.character(melt.A2.a$Anno2),
                                levels = rev(levels(GEX.seur$Anno2)))

#color.stat1 <- color.pre1
#names(color.stat1) <- levels(GEX.seur$Anno2)

pstat.A2.a <- ggplot(melt.A2.a, aes(cnt, weight=value, fill=Anno2)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.A2[rev(levels(GEX.seur$Anno2))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
pstat.A2.a

stat.A2.b <- table(GEX.seur@meta.data[,c("FB.info","Anno2")])
stat.A2.b
##            Anno2
## FB.info     Bcell Tcell  NK ILC2 BAS EOS NEU pDC cDC migDC Mono  IM  AM
##   LUNG.CTL1    44   140 498  456  10  46 614  60  57    57  200 260 257
##   LUNG.CTL2    68   155 458  301  13  68 662  66  67    29  269 241 130
##   LUNG.CKO1    82   348 391  199   7  55 910  46  44    32  284 234 140
##   LUNG.CKO2    57   320 461  234  12  70 757  91  62    35  347 230 126
melt.A2.b <- reshape2::melt(stat.A2.b)
melt.A2.b$Anno2 <- factor(as.character(melt.A2.b$Anno2),
                                levels = rev(levels(GEX.seur$Anno2)))

#color.stat1 <- color.pre1
#names(color.stat1) <- levels(GEX.seur$Anno2)

pstat.A2.b <- ggplot(melt.A2.b, aes(FB.info, weight=value, fill=Anno2)) +
  geom_bar(color="black", width = .7, position = "fill") +
  labs( y = "Proportion of total (%)",title = "",x="") +
  scale_fill_manual(values = color.A2[rev(levels(GEX.seur$Anno2))]) +
  scale_y_continuous(expand = c(0,0), n.breaks = 6, labels = c(0,20,40,60,80,100)) + 
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 13.6),
        axis.text.y = element_text(size = 16))+
  guides(fill = guide_legend(reverse = T, title = ""))
pstat.A2.b

heatmap

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$Anno1),
                   main = "Cell Count",
                   gaps_row = c(2),
      #gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$Anno1)),
                   main = "Cell Ratio",
                   gaps_row = c(2),
      #gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$Anno2),
                   main = "Cell Count",
                   gaps_row = c(2),
      #gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$Anno2)),
                   main = "Cell Ratio",
                   gaps_row = c(2),
      #gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

barplot

new

stat_Anno1 <- GEX.seur@meta.data[,c("cnt","rep","Anno1")]

stat_Anno1.s <- stat_Anno1 %>%
  group_by(cnt,rep,Anno1) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.Anno1 <- stat_Anno1.s %>%
  ggplot(aes(x = Anno1, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c(color.cnt), name="") +
  labs(title="Cell Composition of Lung immune data, Anno1", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=Anno1, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.Anno1

stat_Anno2 <- GEX.seur@meta.data[,c("cnt","rep","Anno2")]

stat_Anno2.s <- stat_Anno2 %>%
  group_by(cnt,rep,Anno2) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.Anno2 <- stat_Anno2.s %>%
  ggplot(aes(x = Anno2, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c(color.cnt), name="") +
  labs(title="Cell Composition of Lung immune data, Anno2", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=Anno2, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.Anno2

sig.test

glm.nb - anova.LRT

Sort.N <- c("LUNG.CTL","LUNG.CKO")

glm.nb_anova.Anno1 <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_Anno1.s_N <- stat_Anno1.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      
      total.N <- stat_Anno1.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_Anno1.s_N$total <- total.N[paste0(stat_Anno1.s_N$cnt,
                                            "_",
                                            stat_Anno1.s_N$rep),"total"]
      
      glm.nb_anova.Anno1[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(GEX.seur$Anno1)){
        glm.nb_anova.Anno1[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_Anno1.s_N %>% filter(Anno1==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_Anno1.s_N %>% filter(Anno1==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_Anno1.s_N %>% filter(Anno1==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.Anno1[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.Anno1[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.Anno1_df1 <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.Anno1)))
rownames(glm.nb_anova.Anno1_df1) <- names(glm.nb_anova.Anno1)
colnames(glm.nb_anova.Anno1_df1) <- gsub("X","C",colnames(glm.nb_anova.Anno1_df1))
glm.nb_anova.Anno1_df1
##                        Bcell    Plasma         CD8T     Th/Tin      Treg
## LUNG.CTLvsLUNG.CKO 0.5666052 0.5638712 2.298675e-12 0.01297461 0.6866299
##                         NK.1         NK.2       ILC2.0      ILC2.1    ILC2.2
## LUNG.CTLvsLUNG.CKO 0.1639279 0.0003259607 1.011669e-10 4.88273e-05 0.4761381
##                          BAS       EOS     NEU.1      NEU.2      NEU.3      pDC
## LUNG.CTLvsLUNG.CKO 0.4085462 0.8923079 0.8186386 0.01715316 0.01506928 0.947222
##                          cDC1      cDC2     migDC      Mono.1    Mono.2
## LUNG.CTLvsLUNG.CKO 0.09319647 0.5233529 0.1692928 0.004276802 0.6288162
##                         Mono.3         IM.1      IM.2      IM.3       AM.0
## LUNG.CTLvsLUNG.CKO 0.005688392 0.0001906926 0.4808275 0.6018842 0.02432466
##                          AM.1
## LUNG.CTLvsLUNG.CKO 0.08143052
round(glm.nb_anova.Anno1_df1,4)
##                     Bcell Plasma CD8T Th/Tin   Treg   NK.1  NK.2 ILC2.0 ILC2.1
## LUNG.CTLvsLUNG.CKO 0.5666 0.5639    0  0.013 0.6866 0.1639 3e-04      0      0
##                    ILC2.2    BAS    EOS  NEU.1  NEU.2  NEU.3    pDC   cDC1
## LUNG.CTLvsLUNG.CKO 0.4761 0.4085 0.8923 0.8186 0.0172 0.0151 0.9472 0.0932
##                      cDC2  migDC Mono.1 Mono.2 Mono.3  IM.1   IM.2   IM.3
## LUNG.CTLvsLUNG.CKO 0.5234 0.1693 0.0043 0.6288 0.0057 2e-04 0.4808 0.6019
##                      AM.0   AM.1
## LUNG.CTLvsLUNG.CKO 0.0243 0.0814
Sort.N <- c("LUNG.CTL","LUNG.CKO")

glm.nb_anova.Anno2 <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_Anno2.s_N <- stat_Anno2.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      
      total.N <- stat_Anno2.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_Anno2.s_N$total <- total.N[paste0(stat_Anno2.s_N$cnt,
                                            "_",
                                            stat_Anno2.s_N$rep),"total"]
      
      glm.nb_anova.Anno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(GEX.seur$Anno2)){
        glm.nb_anova.Anno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_Anno2.s_N %>% filter(Anno2==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_Anno2.s_N %>% filter(Anno2==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_Anno2.s_N %>% filter(Anno2==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.Anno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.Anno2[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.Anno2_df1 <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.Anno2)))
rownames(glm.nb_anova.Anno2_df1) <- names(glm.nb_anova.Anno2)
colnames(glm.nb_anova.Anno2_df1) <- gsub("X","C",colnames(glm.nb_anova.Anno2_df1))
glm.nb_anova.Anno2_df1
##                        Bcell        Tcell           NK         ILC2       BAS
## LUNG.CTLvsLUNG.CKO 0.4989486 4.425649e-29 0.0005788733 9.054916e-06 0.4085462
##                          EOS        NEU      pDC       cDC     migDC      Mono
## LUNG.CTLvsLUNG.CKO 0.8923079 0.01875244 0.947222 0.1087304 0.1692928 0.1096035
##                           IM         AM
## LUNG.CTLvsLUNG.CKO 0.0283489 0.05416889
round(glm.nb_anova.Anno2_df1,4)
##                     Bcell Tcell    NK ILC2    BAS    EOS    NEU    pDC    cDC
## LUNG.CTLvsLUNG.CKO 0.4989     0 6e-04    0 0.4085 0.8923 0.0188 0.9472 0.1087
##                     migDC   Mono     IM     AM
## LUNG.CTLvsLUNG.CKO 0.1693 0.1096 0.0283 0.0542

set contrast

set contrast for parallel comparisons

levels(GEX.seur$Anno1)
##  [1] "Bcell"  "Plasma" "CD8T"   "Th/Tin" "Treg"   "NK.1"   "NK.2"   "ILC2.0"
##  [9] "ILC2.1" "ILC2.2" "BAS"    "EOS"    "NEU.1"  "NEU.2"  "NEU.3"  "pDC"   
## [17] "cDC1"   "cDC2"   "migDC"  "Mono.1" "Mono.2" "Mono.3" "IM.1"   "IM.2"  
## [25] "IM.3"   "AM.0"   "AM.1"
levels(GEX.seur$Anno2)
##  [1] "Bcell" "Tcell" "NK"    "ILC2"  "BAS"   "EOS"   "NEU"   "pDC"   "cDC"  
## [10] "migDC" "Mono"  "IM"    "AM"
levels(GEX.seur$cnt)
## [1] "LUNG.CTL" "LUNG.CKO"

cnt1

level.cnt1 <- as.vector(sapply(levels(GEX.seur$Anno1),function(x){
  paste0(x,c(".CTL",".CKO"))
}))
level.cnt1
##  [1] "Bcell.CTL"  "Bcell.CKO"  "Plasma.CTL" "Plasma.CKO" "CD8T.CTL"  
##  [6] "CD8T.CKO"   "Th/Tin.CTL" "Th/Tin.CKO" "Treg.CTL"   "Treg.CKO"  
## [11] "NK.1.CTL"   "NK.1.CKO"   "NK.2.CTL"   "NK.2.CKO"   "ILC2.0.CTL"
## [16] "ILC2.0.CKO" "ILC2.1.CTL" "ILC2.1.CKO" "ILC2.2.CTL" "ILC2.2.CKO"
## [21] "BAS.CTL"    "BAS.CKO"    "EOS.CTL"    "EOS.CKO"    "NEU.1.CTL" 
## [26] "NEU.1.CKO"  "NEU.2.CTL"  "NEU.2.CKO"  "NEU.3.CTL"  "NEU.3.CKO" 
## [31] "pDC.CTL"    "pDC.CKO"    "cDC1.CTL"   "cDC1.CKO"   "cDC2.CTL"  
## [36] "cDC2.CKO"   "migDC.CTL"  "migDC.CKO"  "Mono.1.CTL" "Mono.1.CKO"
## [41] "Mono.2.CTL" "Mono.2.CKO" "Mono.3.CTL" "Mono.3.CKO" "IM.1.CTL"  
## [46] "IM.1.CKO"   "IM.2.CTL"   "IM.2.CKO"   "IM.3.CTL"   "IM.3.CKO"  
## [51] "AM.0.CTL"   "AM.0.CKO"   "AM.1.CTL"   "AM.1.CKO"
# for violin comparison
list.cnt1 <- lapply(levels(GEX.seur$Anno1),function(x){
  paste0(x,c(".CTL",".CKO"))
})
list.cnt1
## [[1]]
## [1] "Bcell.CTL" "Bcell.CKO"
## 
## [[2]]
## [1] "Plasma.CTL" "Plasma.CKO"
## 
## [[3]]
## [1] "CD8T.CTL" "CD8T.CKO"
## 
## [[4]]
## [1] "Th/Tin.CTL" "Th/Tin.CKO"
## 
## [[5]]
## [1] "Treg.CTL" "Treg.CKO"
## 
## [[6]]
## [1] "NK.1.CTL" "NK.1.CKO"
## 
## [[7]]
## [1] "NK.2.CTL" "NK.2.CKO"
## 
## [[8]]
## [1] "ILC2.0.CTL" "ILC2.0.CKO"
## 
## [[9]]
## [1] "ILC2.1.CTL" "ILC2.1.CKO"
## 
## [[10]]
## [1] "ILC2.2.CTL" "ILC2.2.CKO"
## 
## [[11]]
## [1] "BAS.CTL" "BAS.CKO"
## 
## [[12]]
## [1] "EOS.CTL" "EOS.CKO"
## 
## [[13]]
## [1] "NEU.1.CTL" "NEU.1.CKO"
## 
## [[14]]
## [1] "NEU.2.CTL" "NEU.2.CKO"
## 
## [[15]]
## [1] "NEU.3.CTL" "NEU.3.CKO"
## 
## [[16]]
## [1] "pDC.CTL" "pDC.CKO"
## 
## [[17]]
## [1] "cDC1.CTL" "cDC1.CKO"
## 
## [[18]]
## [1] "cDC2.CTL" "cDC2.CKO"
## 
## [[19]]
## [1] "migDC.CTL" "migDC.CKO"
## 
## [[20]]
## [1] "Mono.1.CTL" "Mono.1.CKO"
## 
## [[21]]
## [1] "Mono.2.CTL" "Mono.2.CKO"
## 
## [[22]]
## [1] "Mono.3.CTL" "Mono.3.CKO"
## 
## [[23]]
## [1] "IM.1.CTL" "IM.1.CKO"
## 
## [[24]]
## [1] "IM.2.CTL" "IM.2.CKO"
## 
## [[25]]
## [1] "IM.3.CTL" "IM.3.CKO"
## 
## [[26]]
## [1] "AM.0.CTL" "AM.0.CKO"
## 
## [[27]]
## [1] "AM.1.CTL" "AM.1.CKO"
GEX.seur$cnt1 <- factor(paste0(as.character(GEX.seur$Anno1),
                               ".",
                               gsub("LUNG.","",as.character(GEX.seur$cnt))),
                        levels = level.cnt1)

cnt2

level.cnt2 <- as.vector(sapply(levels(GEX.seur$Anno2),function(x){
  paste0(x,c(".CTL",".CKO"))
}))
level.cnt2
##  [1] "Bcell.CTL" "Bcell.CKO" "Tcell.CTL" "Tcell.CKO" "NK.CTL"    "NK.CKO"   
##  [7] "ILC2.CTL"  "ILC2.CKO"  "BAS.CTL"   "BAS.CKO"   "EOS.CTL"   "EOS.CKO"  
## [13] "NEU.CTL"   "NEU.CKO"   "pDC.CTL"   "pDC.CKO"   "cDC.CTL"   "cDC.CKO"  
## [19] "migDC.CTL" "migDC.CKO" "Mono.CTL"  "Mono.CKO"  "IM.CTL"    "IM.CKO"   
## [25] "AM.CTL"    "AM.CKO"
# for violin comparison
list.cnt2 <- lapply(levels(GEX.seur$Anno2),function(x){
  paste0(x,c(".CTL",".CKO"))
})
list.cnt2
## [[1]]
## [1] "Bcell.CTL" "Bcell.CKO"
## 
## [[2]]
## [1] "Tcell.CTL" "Tcell.CKO"
## 
## [[3]]
## [1] "NK.CTL" "NK.CKO"
## 
## [[4]]
## [1] "ILC2.CTL" "ILC2.CKO"
## 
## [[5]]
## [1] "BAS.CTL" "BAS.CKO"
## 
## [[6]]
## [1] "EOS.CTL" "EOS.CKO"
## 
## [[7]]
## [1] "NEU.CTL" "NEU.CKO"
## 
## [[8]]
## [1] "pDC.CTL" "pDC.CKO"
## 
## [[9]]
## [1] "cDC.CTL" "cDC.CKO"
## 
## [[10]]
## [1] "migDC.CTL" "migDC.CKO"
## 
## [[11]]
## [1] "Mono.CTL" "Mono.CKO"
## 
## [[12]]
## [1] "IM.CTL" "IM.CKO"
## 
## [[13]]
## [1] "AM.CTL" "AM.CKO"
GEX.seur$cnt2 <- factor(paste0(as.character(GEX.seur$Anno2),
                               ".",
                               gsub("LUNG.","",as.character(GEX.seur$cnt))),
                        levels = level.cnt2)

DEGs

GEX.seur@meta.data[,grep("snn|pANN",colnames(GEX.seur@meta.data))] <- NULL
head(GEX.seur@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGGTAAGGA-1   Lung_GEX       1339          661  0.2240478   3.286034
## AAACCCACAAACGTGG-1   Lung_GEX       1838         1077  3.1011970  13.003264
## AAACCCACAACACAAA-1   Lung_GEX       1362          745  0.3671072   2.863436
## AAACCCACAAGCTGCC-1   Lung_GEX       1415          682  0.4946996   2.756184
## AAACCCACACAAACGG-1   Lung_GEX       1312          649  0.3810976   1.905488
## AAACCCACAGCAAGAC-1   Lung_GEX        744          465  0.4032258   5.913978
##                      FB.info      S.Score   G2M.Score Phase seurat_clusters
## AAACCCAAGGTAAGGA-1 LUNG.CTL2 -0.058333333 -0.03422477    G1               2
## AAACCCACAAACGTGG-1 LUNG.CKO2 -0.017095792 -0.04855142    G1               0
## AAACCCACAACACAAA-1 LUNG.CTL2 -0.055357143 -0.08558845    G1               6
## AAACCCACAAGCTGCC-1 LUNG.CKO1 -0.059523810 -0.07794758    G1               6
## AAACCCACACAAACGG-1 LUNG.CKO1 -0.076785714 -0.02419612    G1               9
## AAACCCACAGCAAGAC-1 LUNG.CTL1  0.006035437  0.01867770   G2M               2
##                         cnt DoubletFinder0.05 DoubletFinder0.1 sort_clusters
## AAACCCAAGGTAAGGA-1 LUNG.CTL           Singlet          Singlet             2
## AAACCCACAAACGTGG-1 LUNG.CKO           Singlet          Singlet             0
## AAACCCACAACACAAA-1 LUNG.CTL           Singlet          Singlet             6
## AAACCCACAAGCTGCC-1 LUNG.CKO           Singlet          Singlet             6
## AAACCCACACAAACGG-1 LUNG.CKO           Singlet          Singlet             9
## AAACCCACAGCAAGAC-1 LUNG.CTL           Singlet          Singlet             2
##                      SP.info preAnno1 preAnno2  rep Anno1 Anno2   UMAP_1
## AAACCCAAGGTAAGGA-1 LUNG.CTL2     NEU5      NEU rep2 NEU.2   NEU 7.838557
## AAACCCACAAACGTGG-1 LUNG.CKO2      NK2       NK rep2  NK.2    NK 5.685989
## AAACCCACAACACAAA-1 LUNG.CTL2     NEU2      NEU rep2 NEU.2   NEU 6.848635
## AAACCCACAAGCTGCC-1 LUNG.CKO1     NEU1      NEU rep1 NEU.2   NEU 8.051816
## AAACCCACACAAACGG-1 LUNG.CKO1     NEU4      NEU rep1 NEU.3   NEU 4.011680
## AAACCCACAGCAAGAC-1 LUNG.CTL1     NEU1      NEU rep1 NEU.2   NEU 9.316029
##                       UMAP_2      cnt1    cnt2
## AAACCCAAGGTAAGGA-1 -5.716607 NEU.2.CTL NEU.CTL
## AAACCCACAAACGTGG-1  5.990192  NK.2.CKO  NK.CKO
## AAACCCACAACACAAA-1 -4.713001 NEU.2.CTL NEU.CTL
## AAACCCACAAGCTGCC-1 -3.495369 NEU.2.CKO NEU.CKO
## AAACCCACACAAACGG-1 -6.262844 NEU.3.CKO NEU.CKO
## AAACCCACAGCAAGAC-1 -4.598191 NEU.2.CTL NEU.CTL
#saveRDS(GEX.seur, "./sc10x_RZB.LUNG_Anno.rds")
#GEX.seur <- readRDS("./sc10x_RZB.LUNG_Anno.rds")
Idents(GEX.seur) <- "cnt"

Anno1

all.Anno1 <- levels(GEX.seur$Anno1)
all.Anno1
##  [1] "Bcell"  "Plasma" "CD8T"   "Th/Tin" "Treg"   "NK.1"   "NK.2"   "ILC2.0"
##  [9] "ILC2.1" "ILC2.2" "BAS"    "EOS"    "NEU.1"  "NEU.2"  "NEU.3"  "pDC"   
## [17] "cDC1"   "cDC2"   "migDC"  "Mono.1" "Mono.2" "Mono.3" "IM.1"   "IM.2"  
## [25] "IM.3"   "AM.0"   "AM.1"
#
list_new.Anno1 <- list()
list_new.Anno1[["All"]] <- all.Anno1

for(NN in all.Anno1){
  list_new.Anno1[[NN]] <- NN
}

names_new.Anno1 <- names(list_new.Anno1)
list_new.Anno1
## $All
##  [1] "Bcell"  "Plasma" "CD8T"   "Th/Tin" "Treg"   "NK.1"   "NK.2"   "ILC2.0"
##  [9] "ILC2.1" "ILC2.2" "BAS"    "EOS"    "NEU.1"  "NEU.2"  "NEU.3"  "pDC"   
## [17] "cDC1"   "cDC2"   "migDC"  "Mono.1" "Mono.2" "Mono.3" "IM.1"   "IM.2"  
## [25] "IM.3"   "AM.0"   "AM.1"  
## 
## $Bcell
## [1] "Bcell"
## 
## $Plasma
## [1] "Plasma"
## 
## $CD8T
## [1] "CD8T"
## 
## $`Th/Tin`
## [1] "Th/Tin"
## 
## $Treg
## [1] "Treg"
## 
## $NK.1
## [1] "NK.1"
## 
## $NK.2
## [1] "NK.2"
## 
## $ILC2.0
## [1] "ILC2.0"
## 
## $ILC2.1
## [1] "ILC2.1"
## 
## $ILC2.2
## [1] "ILC2.2"
## 
## $BAS
## [1] "BAS"
## 
## $EOS
## [1] "EOS"
## 
## $NEU.1
## [1] "NEU.1"
## 
## $NEU.2
## [1] "NEU.2"
## 
## $NEU.3
## [1] "NEU.3"
## 
## $pDC
## [1] "pDC"
## 
## $cDC1
## [1] "cDC1"
## 
## $cDC2
## [1] "cDC2"
## 
## $migDC
## [1] "migDC"
## 
## $Mono.1
## [1] "Mono.1"
## 
## $Mono.2
## [1] "Mono.2"
## 
## $Mono.3
## [1] "Mono.3"
## 
## $IM.1
## [1] "IM.1"
## 
## $IM.2
## [1] "IM.2"
## 
## $IM.3
## [1] "IM.3"
## 
## $AM.0
## [1] "AM.0"
## 
## $AM.1
## [1] "AM.1"

run

# save DEGs' table
df_test.DEGs_Anno1 <- data.frame()
for(nn in names_new.Anno1){
  df_test.DEGs_Anno1 <- rbind(df_test.DEGs_Anno1, data.frame(test.DEGs_Anno1[[nn]],Anno1=nn))
}

#write.csv(df_test.DEGs_Anno1, "sc10x_RZB.LUNG_DEGs.CTLvsCKO.Anno1.csv")

stat

df_test.DEGs_Anno1$Anno1 <- factor(as.character(df_test.DEGs_Anno1$Anno1),
                                   levels = names_new.Anno1)
cut.padj = 0.05
cut.log2FC = 0.1   
#cut.log2FC = log2(1.5)   
#cut.log2FC = log2(2)  

cut.pct1 = 0.05

stat_test.DEGs_Anno1 <- df_test.DEGs_Anno1 %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1) %>%
                      group_by(Anno1,cluster) %>%
                      summarise(up.DEGs = n()) %>% as.data.frame()
stat_test.DEGs_Anno1
##     Anno1  cluster up.DEGs
## 1     All LUNG.CTL     380
## 2     All LUNG.CKO     347
## 3  Plasma LUNG.CKO       1
## 4    CD8T LUNG.CKO       1
## 5  Th/Tin LUNG.CKO       1
## 6    Treg LUNG.CKO       1
## 7    NK.1 LUNG.CKO       1
## 8    NK.2 LUNG.CTL       5
## 9    NK.2 LUNG.CKO       1
## 10 ILC2.0 LUNG.CTL       3
## 11 ILC2.0 LUNG.CKO       6
## 12 ILC2.1 LUNG.CTL       1
## 13 ILC2.1 LUNG.CKO       2
## 14 ILC2.2 LUNG.CTL       1
## 15 ILC2.2 LUNG.CKO       2
## 16  NEU.2 LUNG.CTL       2
## 17  NEU.2 LUNG.CKO       2
## 18    pDC LUNG.CKO       1
## 19   cDC1 LUNG.CTL       1
## 20   cDC2 LUNG.CKO       1
## 21  migDC LUNG.CKO       1
## 22 Mono.1 LUNG.CKO       1
## 23 Mono.2 LUNG.CTL       2
## 24 Mono.2 LUNG.CKO       1
## 25 Mono.3 LUNG.CTL       1
## 26 Mono.3 LUNG.CKO       1
## 27   IM.1 LUNG.CKO       1
## 28   IM.2 LUNG.CKO       1
## 29   IM.3 LUNG.CKO       1
## 30   AM.0 LUNG.CKO       1
## 31   AM.1 LUNG.CTL       2
## 32   AM.1 LUNG.CKO       2
cut.padj = 0.05
cut.log2FC = 0.1   
#cut.log2FC = log2(1.5) 
#cut.log2FC = log2(2)
  
cut.pct1 = 0.05

stat_test.DEGs_Anno1.barplot <- df_test.DEGs_Anno1 %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1) %>%
                      group_by(Anno1,cluster) %>%
                      summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=Anno1, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Count") +
  #labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |FC|>1.5"), y = "Count") +
  #labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |FC|>2"), y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))



stat_test.DEGs_Anno1.barplot

Anno1 plot

pp_test.DEGs.Anno1 <- lapply(list_new.Anno1, function(x){NA})
#
test.DEGs_Anno1.combine <- test.DEGs_Anno1
test.DEGs_Anno1.combine <- lapply(test.DEGs_Anno1.combine, function(x){
  x[x$cluster=="LUNG.CTL","avg_log2FC"] <- -x[x$cluster=="LUNG.CTL","avg_log2FC"]
  x
})

plot

pp_test.DEGs.Anno1$All <- test.DEGs_Anno1.combine$All %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="All LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$All

those All-DEGs enriched in low FC, log2FC 0.2 should filter out most of them
Hsp- and Rpl29 might should be ignored

Plasma

pp_test.DEGs.Anno1$Plasma <- test.DEGs_Anno1.combine$Plasma %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="Plasma LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$Plasma

NK.2

pp_test.DEGs.Anno1$NK.2 <- test.DEGs_Anno1.combine$NK.2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="NK.2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$NK.2

ILC2.0

ILC2: only CKO-gene Hilpda significant, no other similar level DEGs
(Fam71f2 should be an artifact caused by CRE-event, which was tested in bulk data)

pp_test.DEGs.Anno1$ILC2.0 <- test.DEGs_Anno1.combine$ILC2.0 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="ILC2.0 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$ILC2.0

ILC2.1

pp_test.DEGs.Anno1$ILC2.1 <- test.DEGs_Anno1.combine$ILC2.1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="ILC2.1 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$ILC2.1

ILC2.2

pp_test.DEGs.Anno1$ILC2.2 <- test.DEGs_Anno1.combine$ILC2.2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="ILC2.2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$ILC2.2

NEU.2

pp_test.DEGs.Anno1$NEU.2 <- test.DEGs_Anno1.combine$NEU.2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="NEU.2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$NEU.2

pDC

pp_test.DEGs.Anno1$pDC <- test.DEGs_Anno1.combine$pDC %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="pDC LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$pDC

cDC1

pp_test.DEGs.Anno1$cDC1 <- test.DEGs_Anno1.combine$cDC1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="cDC1 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$cDC1

cDC2

pp_test.DEGs.Anno1$cDC2 <- test.DEGs_Anno1.combine$cDC2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="cDC2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$cDC2

Mono.2

pp_test.DEGs.Anno1$Mono.2 <- test.DEGs_Anno1.combine$Mono.2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="Mono.2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$Mono.2

Mono.3

pp_test.DEGs.Anno1$Mono.3 <- test.DEGs_Anno1.combine$Mono.3 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="Mono.3 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$Mono.3

AM.1

pp_test.DEGs.Anno1$AM.1 <- test.DEGs_Anno1.combine$AM.1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="AM.1 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno1$AM.1

Anno2

all.Anno2 <- levels(GEX.seur$Anno2)
all.Anno2
##  [1] "Bcell" "Tcell" "NK"    "ILC2"  "BAS"   "EOS"   "NEU"   "pDC"   "cDC"  
## [10] "migDC" "Mono"  "IM"    "AM"
#
list_new.Anno2 <- list()
list_new.Anno2[["All"]] <- all.Anno2

for(NN in all.Anno2){
  list_new.Anno2[[NN]] <- NN
}

names_new.Anno2 <- names(list_new.Anno2)
list_new.Anno2
## $All
##  [1] "Bcell" "Tcell" "NK"    "ILC2"  "BAS"   "EOS"   "NEU"   "pDC"   "cDC"  
## [10] "migDC" "Mono"  "IM"    "AM"   
## 
## $Bcell
## [1] "Bcell"
## 
## $Tcell
## [1] "Tcell"
## 
## $NK
## [1] "NK"
## 
## $ILC2
## [1] "ILC2"
## 
## $BAS
## [1] "BAS"
## 
## $EOS
## [1] "EOS"
## 
## $NEU
## [1] "NEU"
## 
## $pDC
## [1] "pDC"
## 
## $cDC
## [1] "cDC"
## 
## $migDC
## [1] "migDC"
## 
## $Mono
## [1] "Mono"
## 
## $IM
## [1] "IM"
## 
## $AM
## [1] "AM"

run

# save DEGs' table
df_test.DEGs_Anno2 <- data.frame()
for(nn in names_new.Anno2){
  df_test.DEGs_Anno2 <- rbind(df_test.DEGs_Anno2, data.frame(test.DEGs_Anno2[[nn]],Anno2=nn))
}

#write.csv(df_test.DEGs_Anno2, "sc10x_RZB.LUNG_DEGs.CTLvsCKO.Anno2.csv")

stat

df_test.DEGs_Anno2$Anno2 <- factor(as.character(df_test.DEGs_Anno2$Anno2),
                                   levels = names_new.Anno2)
cut.padj = 0.05
cut.log2FC = 0.1   
#cut.log2FC = log2(1.5)  
#cut.log2FC = log2(2) 
  
cut.pct1 = 0.05

stat_test.DEGs_Anno2 <- df_test.DEGs_Anno2 %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1) %>%
                      group_by(Anno2,cluster) %>%
                      summarise(up.DEGs = n()) %>% as.data.frame()
stat_test.DEGs_Anno2
##    Anno2  cluster up.DEGs
## 1    All LUNG.CTL     380
## 2    All LUNG.CKO     347
## 3  Bcell LUNG.CKO       1
## 4  Tcell LUNG.CTL       3
## 5  Tcell LUNG.CKO       2
## 6     NK LUNG.CTL       4
## 7     NK LUNG.CKO       1
## 8   ILC2 LUNG.CTL      11
## 9   ILC2 LUNG.CKO      17
## 10   NEU LUNG.CTL      12
## 11   NEU LUNG.CKO       6
## 12   pDC LUNG.CKO       1
## 13   cDC LUNG.CTL       6
## 14   cDC LUNG.CKO       1
## 15 migDC LUNG.CKO       1
## 16  Mono LUNG.CTL       3
## 17  Mono LUNG.CKO       3
## 18    IM LUNG.CTL       2
## 19    IM LUNG.CKO       1
## 20    AM LUNG.CTL       2
## 21    AM LUNG.CKO       2
cut.padj = 0.05
cut.log2FC = 0.1   
#cut.log2FC = log2(1.5)  
#cut.log2FC = log2(2) 

cut.pct1 = 0.05

stat_test.DEGs_Anno2.barplot <- df_test.DEGs_Anno2 %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1) %>%
                      group_by(Anno2,cluster) %>%
                      summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=Anno2, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Count") +
  #labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |FC|>1.5"), y = "Count") +
  #labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |FC|>2"), y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))



stat_test.DEGs_Anno2.barplot

Anno2 plot

pp_test.DEGs.Anno2 <- lapply(list_new.Anno2, function(x){NA})
#
test.DEGs_Anno2.combine <- test.DEGs_Anno2
test.DEGs_Anno2.combine <- lapply(test.DEGs_Anno2.combine, function(x){
  x[x$cluster=="LUNG.CTL","avg_log2FC"] <- -x[x$cluster=="LUNG.CTL","avg_log2FC"]
  x
})

All

pp_test.DEGs.Anno2$All <- test.DEGs_Anno2.combine$All %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="All LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$All

Bcell

pp_test.DEGs.Anno2$Bcell <- test.DEGs_Anno2.combine$Bcell %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="Bcell LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$Bcell

Tcell

pp_test.DEGs.Anno2$Tcell <- test.DEGs_Anno2.combine$Tcell %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.05)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="Tcell LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$Tcell

NK

pp_test.DEGs.Anno2$NK <- test.DEGs_Anno2.combine$NK %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="NK LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$NK

ILC2

pp_test.DEGs.Anno2$ILC2 <- test.DEGs_Anno2.combine$ILC2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.05)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="ILC2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$ILC2

some few ILC2 markers like Il13/Il5/Areg that up-regulated in CKO should be further tested !

NEU

pp_test.DEGs.Anno2$NEU <- test.DEGs_Anno2.combine$NEU %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="NEU LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$NEU

pDC

pp_test.DEGs.Anno2$pDC <- test.DEGs_Anno2.combine$pDC %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="pDC LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$pDC

cDC

pp_test.DEGs.Anno2$cDC <- test.DEGs_Anno2.combine$cDC %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="cDC LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$cDC

migDC

pp_test.DEGs.Anno2$migDC <- test.DEGs_Anno2.combine$migDC %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="migDC LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$migDC

Mono

pp_test.DEGs.Anno2$Mono <- test.DEGs_Anno2.combine$Mono %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="Mono LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$Mono

IM

pp_test.DEGs.Anno2$IM <- test.DEGs_Anno2.combine$IM %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="IM LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$IM

AM

pp_test.DEGs.Anno2$AM <- test.DEGs_Anno2.combine$AM %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC<0) | (p_val_adj < 1e-15 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.35)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="AM LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.Anno2$AM

pp_test.DEGs.Anno2_modILC2 <- test.DEGs_Anno2.combine$ILC2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC< -log2(1.5)) | (p_val_adj < 1e-15 & avg_log2FC>log2(1.5)) | (p_val_adj < 0.05 & abs(avg_log2FC) > log2(1.5))) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="ILC2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")+
  geom_vline(xintercept = c(-log2(1.5),log2(1.5)), linetype="dotted")
pp_test.DEGs.Anno2_modILC2

some few ILC2 markers like Il13/Il5/Areg that up-regulated in CKO should be further tested !
pp_test.DEGs.Anno2_moddILC2 <- test.DEGs_Anno2.combine$ILC2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-18 & avg_log2FC< -log2(1.5)) | (p_val_adj < 1e-15 & avg_log2FC>log2(1.5)) | (p_val_adj < 0.05 & abs(avg_log2FC) > log2(2))) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"LUNG.CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"LUNG.CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("LUNG.CTL"=color.cnt[1],
                               "LUNG.CKO"=color.cnt[2],
                               "None"="grey")) +
  theme_classic() + labs(title="ILC2 LUNG.CTL vs LUNG.CTL") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")+
  geom_vline(xintercept = c(-log2(2),log2(2)), linetype="dotted")
pp_test.DEGs.Anno2_moddILC2

(FC2 is a quite large value which is not often used for scRNAseq data.)

DEGs violin

ILC2

DEG.list.ILC2 <- list(CTL= (test.DEGs_Anno2$ILC2 %>% filter(cluster == "LUNG.CTL" & p_val_adj<0.05))$gene,
                      CKO= (test.DEGs_Anno2$ILC2 %>% filter(cluster == "LUNG.CKO" & p_val_adj<0.05))$gene)
DEG.list.ILC2
## $CTL
##  [1] "Hilpda"   "Hspa8"    "Hsph1"    "Fkbp5"    "Hspa5"    "Glo1"    
##  [7] "Rif1"     "Xpo1"     "Pclaf"    "Cbx5"     "Hsp90aa1"
## 
## $CKO
##  [1] "Fam71f2"  "Rpl29"    "Cish"     "Il13"     "Sppl2a"   "Pxdc1"   
##  [7] "Pim1"     "Areg"     "Mapkapk3" "Furin"    "Junb"     "Il5"     
## [13] "Cxcr6"    "Nr4a3"    "Hs3st1"   "Il1rl1"   "Kdm6b"
lapply(DEG.list.ILC2, length)
## $CTL
## [1] 11
## 
## $CKO
## [1] 17
pp.vln.ILC2 <- list()
for(nn in c(DEG.list.ILC2$CTL,DEG.list.ILC2$CKO)){
  
  pp.vln.ILC2[[nn]] <- VlnPlot(subset(GEX.seur, subset= Anno2 %in% c("ILC2")), features = nn, group.by = "cnt", cols = color.cnt, pt.size = 0) +
    NoLegend() &
    geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(2),
                               size=2.75
                               )
  
}
#pp.vln.ILC2
cowplot::plot_grid(
  plotlist = pp.vln.ILC2,
  ncol = 6
)

NEU

DEG.list.NEU <- list(CTL= (test.DEGs_Anno2$NEU %>% filter(cluster == "LUNG.CTL" & p_val_adj<0.05))$gene,
                      CKO= (test.DEGs_Anno2$NEU %>% filter(cluster == "LUNG.CKO" & p_val_adj<0.05))$gene)
DEG.list.NEU
## $CTL
##  [1] "Sla"    "Cd33"   "Cxcr2"  "S100a8" "Zyx"    "Gpcpd1" "Gda"    "Rdh12" 
##  [9] "Jaml"   "S100a9" "Zfand5" "Rcsd1" 
## 
## $CKO
## [1] "Mmp9"    "Basp1"   "Gadd45b" "Il1rn"   "Atp6v0c" "Ninj1"
lapply(DEG.list.NEU, length)
## $CTL
## [1] 12
## 
## $CKO
## [1] 6
pp.vln.NEU <- list()
for(nn in c(DEG.list.NEU$CTL,DEG.list.NEU$CKO)){
  
  pp.vln.NEU[[nn]] <- VlnPlot(subset(GEX.seur, subset= Anno2 %in% c("NEU")), features = nn, group.by = "cnt", cols = color.cnt, pt.size = 0) +
    NoLegend() &
    geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(2),
                               size=2.75
                               )
  
}
#pp.vln.NEU
cowplot::plot_grid(
  plotlist = pp.vln.NEU,
  ncol = 5
)

sig.score

bulk DEGs

bulk.table <- read.csv("./bulk.result/Table S1_DEGs.edgeR 1hr_revised.csv")
colnames(bulk.table)[1] <- "Gene.ID"
bulk.table[1:5,]
##   Gene.ID log2FoldChange   logCPM        LR      padj upregulated.condition
## 1    Cish      -7.230326 9.514838 2965.7218  0.00e+00             ILC2 s.n.
## 2   Egln3      -6.278484 8.759880 2127.1264  0.00e+00             ILC2 s.n.
## 3    Tgm1      -5.802098 7.097209  859.6716 1.92e-186             ILC2 s.n.
## 4     Id1      -5.088148 6.936786  715.1301 3.88e-155             ILC2 s.n.
## 5    Xbp1      -4.878188 9.392084 2506.3556  0.00e+00             ILC2 s.n.
dim(bulk.table)
## [1] 653   6
table(bulk.table$upregulated.condition)
## 
## control s.n.    ILC2 s.n. 
##          229          424
table((bulk.table %>% filter(padj<0.001))$upregulated.condition)
## 
## control s.n.    ILC2 s.n. 
##          165          390
(bulk.table %>% filter(log2FoldChange>0))[1:10,]
##     Gene.ID log2FoldChange   logCPM        LR         padj
## 1     Rnpep       1.000851 5.996565  30.75917 2.860000e-07
## 2      Prep       1.002807 4.649743  11.43933 3.573400e-03
## 3  Calcoco1       1.004805 5.026925  15.25237 5.690780e-04
## 4    Osbpl2       1.008297 7.337025  79.89658 8.550000e-18
## 5      Mib2       1.012207 4.779305  12.83879 1.840359e-03
## 6    Pmaip1       1.012392 4.612376  11.31366 3.802935e-03
## 7  Tbc1d10c       1.021033 7.737830 100.67671 2.810000e-22
## 8  Tnfrsf21       1.023414 7.462074  88.95066 9.410000e-20
## 9   Ankrd28       1.023469 4.480079  10.42777 5.778005e-03
## 10  Terf2ip       1.023469 4.479973  10.42777 5.778005e-03
##    upregulated.condition
## 1           control s.n.
## 2           control s.n.
## 3           control s.n.
## 4           control s.n.
## 5           control s.n.
## 6           control s.n.
## 7           control s.n.
## 8           control s.n.
## 9           control s.n.
## 10          control s.n.
range(bulk.table$padj)
## [1] 0.000000000 0.009942589
GEX.seur <- add_geneset_score(GEX.seur, 
                              geneset = (bulk.table %>% filter(upregulated.condition=="control s.n."))$Gene.ID,
                              setname = "CTL.s.n.up")
## Summarizing data
GEX.seur <- add_geneset_score(GEX.seur, 
                              geneset = (bulk.table %>% filter(upregulated.condition=="ILC2 s.n."))$Gene.ID,
                              setname = "ILC2.s.n.up")
## Summarizing data
VlnPlot(GEX.seur, features = "CTL.s.n.up", group.by = "Anno2", split.by = "cnt", cols = color.cnt) + labs(x="All")

VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "CTL.s.n.up", group.by = "Anno1", split.by = "cnt", cols = color.cnt) + labs(x="NEU only")

VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "ILC2.s.n.up", group.by = "Anno1", split.by = "cnt", cols = color.cnt) + labs(x="NEU only")

pscore.a <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "ILC2.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )
pscore.a

pscore.a1 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.1")), features = "ILC2.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU.1 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )

pscore.a2 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.2")), features = "ILC2.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU.2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )

pscore.a3 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.3")), features = "ILC2.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU.3 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )

cowplot::plot_grid(
  pscore.a,
  pscore.a1,
  pscore.a2,
  pscore.a3,
ncol = 4)

pscore.b <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "CTL.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.15),
                               size=2.75
                               )
pscore.b

pscore.b1 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.1")), features = "CTL.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU.1 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.10),
                               size=2.75
                               )

pscore.b2 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.2")), features = "CTL.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU.2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.09),
                               size=2.75
                               )

pscore.b3 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.3")), features = "CTL.s.n.up", group.by = "cnt", cols = color.cnt) + labs(x="NEU.3 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.10),
                               size=2.75
                               )

cowplot::plot_grid(
  pscore.b,
  pscore.b1,
  pscore.b2,
  pscore.b3,
ncol = 4)

GEX.seur <- add_geneset_score(GEX.seur, 
                              geneset = (bulk.table %>% filter(upregulated.condition=="control s.n." & padj < 0.001))$Gene.ID,
                              setname = "CTL.s.n.up.p0.001")
## Summarizing data
GEX.seur <- add_geneset_score(GEX.seur, 
                              geneset = (bulk.table %>% filter(upregulated.condition=="ILC2 s.n." & padj < 0.001))$Gene.ID,
                              setname = "ILC2.s.n.up.p0.001")
## Summarizing data
VlnPlot(GEX.seur, features = "CTL.s.n.up.p0.001", group.by = "Anno2", split.by = "cnt", cols = color.cnt) + labs(x="All")

VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "CTL.s.n.up.p0.001", group.by = "Anno1", split.by = "cnt", cols = color.cnt) + 
  labs(x="NEU only")

VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "ILC2.s.n.up.p0.001", group.by = "Anno1", split.by = "cnt", cols = color.cnt) + 
  labs(x="NEU only")

pscore.aa <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "ILC2.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )
pscore.aa

pscore.aa1 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.1")), features = "ILC2.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU.1 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )

pscore.aa2 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.2")), features = "ILC2.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU.2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )

pscore.aa3 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.3")), features = "ILC2.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU.3 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.45),
                               size=2.75
                               )

cowplot::plot_grid(
  pscore.aa,
  pscore.aa1,
  pscore.aa2,
  pscore.aa3,
ncol = 4)

pscore.bb <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "CTL.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.15),
                               size=2.75
                               )
pscore.bb

pscore.bb1 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.1")), features = "CTL.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU.1 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.10),
                               size=2.75
                               )

pscore.bb2 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.2")), features = "CTL.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU.2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.09),
                               size=2.75
                               )

pscore.bb3 <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("NEU.3")), features = "CTL.s.n.up.p0.001", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU.3 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.10),
                               size=2.75
                               )

cowplot::plot_grid(
  pscore.bb,
  pscore.bb1,
  pscore.bb2,
  pscore.bb3,
ncol = 4)

cycling

CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
CC_gene
##  [1] "Mcm5"     "Pcna"     "Tyms"     "Fen1"     "Mcm7"     "Mcm4"    
##  [7] "Rrm1"     "Ung"      "Gins2"    "Mcm6"     "Cdca7"    "Dtl"     
## [13] "Prim1"    "Uhrf1"    "Cenpu"    "Hells"    "Rfc2"     "Polr1b"  
## [19] "Nasp"     "Rad51ap1" "Gmnn"     "Wdr76"    "Slbp"     "Ccne2"   
## [25] "Ubr7"     "Pold3"    "Msh2"     "Atad2"    "Rad51"    "Rrm2"    
## [31] "Cdc45"    "Cdc6"     "Exo1"     "Tipin"    "Dscc1"    "Blm"     
## [37] "Casp8ap2" "Usp1"     "Clspn"    "Pola1"    "Chaf1b"   "Mrpl36"  
## [43] "E2f8"     "Hmgb2"    "Cdk1"     "Nusap1"   "Ube2c"    "Birc5"   
## [49] "Tpx2"     "Top2a"    "Ndc80"    "Cks2"     "Nuf2"     "Cks1b"   
## [55] "Mki67"    "Tmpo"     "Cenpf"    "Tacc3"    "Pimreg"   "Smc4"    
## [61] "Ccnb2"    "Ckap2l"   "Ckap2"    "Aurkb"    "Bub1"     "Kif11"   
## [67] "Anp32e"   "Tubb4b"   "Gtse1"    "Kif20b"   "Hjurp"    "Cdca3"   
## [73] "Jpt1"     "Cdc20"    "Ttk"      "Cdc25c"   "Kif2c"    "Rangap1" 
## [79] "Ncapd2"   "Dlgap5"   "Cdca2"    "Cdca8"    "Ect2"     "Kif23"   
## [85] "Hmmr"     "Aurka"    "Psrc1"    "Anln"     "Lbr"      "Ckap5"   
## [91] "Cenpe"    "Ctcf"     "Nek2"     "G2e3"     "Gas2l3"   "Cbx5"    
## [97] "Cenpa"
GEX.seur <- add_geneset_score(GEX.seur, 
                              geneset = CC_gene,
                              setname = "score.Cycling")
## Summarizing data
VlnPlot(GEX.seur, features = "score.Cycling", group.by = "Anno2", split.by = "cnt", cols = color.cnt) + labs(x="All")

VlnPlot(GEX.seur, features = "score.Cycling", group.by = "Anno1", split.by = "cnt", cols = color.cnt) + labs(x="All")

pscore.CC.Anno2 <- GEX.seur@meta.data %>%
  ggplot(aes(x=cnt2, y= score.Cycling, fill = cnt)) +
  geom_violin(trim = TRUE, scale = "width", lwd=0.1) +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) &
  labs(x="", y="", title = "score.Cycling") + NoLegend() +
  scale_fill_manual(values = color.cnt) +
    theme(axis.title.x = element_blank(),
          #axis.text.x = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12.1),
          #axis.ticks.x = element_blank(),
          axis.line = element_line(color = "black", size = 0.1),
          panel.grid = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank(),
          plot.margin = unit(c(0.03,0.1,0,0.1),"cm")) + coord_cartesian(ylim=c(-0.15,0.75)) + geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1) +
    stat_summary(fun.y=mean, geom="point", shape=23, size=0.45, color="black", fill="white", alpha=0.75) +
    stat_compare_means(aes(label= ..p.signif..),
                       method = "wilcox.test",
                       comparisons = list.cnt2,
                       label.y = c(rep(0.65,13)),
                       size = 3.5)
pscore.CC.Anno2

pscore.CC <- list()
pscore.CC[["ILC2"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("ILC2")), features = "score.Cycling", group.by = "cnt", cols = color.cnt) + 
  labs(x="ILC2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.65),
                               size=2.75
                               )
pscore.CC[["ILC2"]]

pscore.CC[["NEU"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "score.Cycling", group.by = "cnt", cols = color.cnt) + 
  labs(x="NEU only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.15),
                               size=2.75
                               )
pscore.CC[["NEU"]]

pscore.CC[["cDC"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("cDC")), features = "score.Cycling", group.by = "cnt", cols = color.cnt) + 
  labs(x="cDC only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.4),
                               size=2.75
                               )
pscore.CC[["cDC"]]

pscore.CC[["cDC1"]] <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("cDC1")), features = "score.Cycling", group.by = "cnt", cols = color.cnt) + 
  labs(x="cDC1 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.4),
                               size=2.75
                               )
pscore.CC[["cDC1"]]

pscore.CC[["cDC2"]] <- VlnPlot(subset(GEX.seur, subset = Anno1 %in% c("cDC2")), features = "score.Cycling", group.by = "cnt", cols = color.cnt) + 
  labs(x="cDC2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.3),
                               size=2.75
                               )
pscore.CC[["cDC2"]]

pscore.CC[["migDC"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("migDC")), features = "score.Cycling", group.by = "cnt", cols = color.cnt) + 
  labs(x="migDC only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.15),
                               size=2.75
                               )
pscore.CC[["migDC"]]

cowplot::plot_grid(
  plotlist = pscore.CC,
  ncol = 3
)
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

lipid pathways

# old code previous code see I:\Shared_win\projects\202309_HilpdaILC2\pathway
go.list <- list(GO0016042=as.vector(unlist(read.table("./figure20240407/1.LUNG_only/sig.score/pathways/GOlist/GO0016042.txt"))),
                GO0006635=as.vector(unlist(read.table("./figure20240407/1.LUNG_only/sig.score/pathways/GOlist/GO0006635.txt"))),
                GO0034440=as.vector(unlist(read.table("./figure20240407/1.LUNG_only/sig.score/pathways/GOlist/GO0034440.txt"))),
                GO0019395=as.vector(unlist(read.table("./figure20240407/1.LUNG_only/sig.score/pathways/GOlist/GO0019395.txt")))
                )
lapply(go.list,head)
## $GO0016042
## [1] "Aoah"   "Hexb"   "Prkaa1" "Naga"   "Asah1"  "Aspg"  
## 
## $GO0006635
## [1] "Echdc1" "Etfdh"  "Fabp1"  "Mtor"   "Acad9"  "Eci1"  
## 
## $GO0034440
## [1] "Prkaa1"  "C1qtnf9" "Cyp4v3"  "Echdc1"  "Etfdh"   "Fabp1"  
## 
## $GO0019395
## [1] "Prkaa1"  "C1qtnf9" "Cyp4v3"  "Echdc1"  "Etfdh"   "Fabp1"
lapply(go.list, length)
## $GO0016042
## [1] 358
## 
## $GO0006635
## [1] 82
## 
## $GO0034440
## [1] 130
## 
## $GO0019395
## [1] 122
library(UpSetR)

upset(fromList(go.list),
      order.by = 'freq', nsets = 6, point.size=3.5, line.size =1, text.scale = 2)
  
grid::grid.text("Overlaps",x = 0.65, y=0.95, gp=grid::gpar(fontsize=20))

SS2

matt_pc <- read.csv("../SS2/analysis0915/RNAseq.SS2_RZB.20210707.filt_tpm.pc_gene.csv", row.names = 1)
head(matt_pc)
##               CTL1.0707 CTL2.0707 CTL3.0707 CTL4.0707 CTL5.0707 CTL6.0707
## 0610009B22Rik        14        17        11        19        10        13
## 0610010F05Rik         7         4         4         7         6         8
## 0610010K14Rik        82        88        88        73        80        68
## 0610012G03Rik        30        28        29        32        27        27
## 0610030E20Rik        10         8         8        12        11        10
## 1110004F10Rik       152       163       165       152       165       153
##               CTL7.0707 CKO1.0707 CKO2.0707 CKO3.0707 CKO5.0707 CKO6.0707
## 0610009B22Rik        16        17        23        15        17         9
## 0610010F05Rik         6         5         8         5         3         9
## 0610010K14Rik        77        77        76        78        63        76
## 0610012G03Rik        29        28        32        33        33        28
## 0610030E20Rik         9         7        11         6        11        11
## 1110004F10Rik       166       186       169       174       145       160
##               CKO7.0707 CKO8.0707
## 0610009B22Rik        20        12
## 0610010F05Rik         6         6
## 0610010K14Rik       100        85
## 0610012G03Rik        27        25
## 0610030E20Rik        10         9
## 1110004F10Rik       197       175
dim(matt_pc)
## [1] 9298   14
name.list <- lapply(names(go.list),function(x){
  paste0("OE.",x)
})
names(name.list) <- names(go.list)

name.list
## $GO0016042
## [1] "OE.GO0016042"
## 
## $GO0006635
## [1] "OE.GO0006635"
## 
## $GO0034440
## [1] "OE.GO0034440"
## 
## $GO0019395
## [1] "OE.GO0019395"
overall expression
source("I:/Shared_win/projects/RNA_normal/analysis.r")
OE_result1 <- easy_OE(mat_e = matt_pc,
                      cells_s = colnames(matt_pc),
                      path_n = name.list,
                      gene_sign = go.list)

OE_result1$OE
##           GO0016042 GO0006635 GO0034440 GO0019395
## CTL1.0707    -0.134    -0.303    -0.355    -0.330
## CTL2.0707    -0.068     0.048    -0.071    -0.043
## CTL3.0707     0.091     0.022     0.184     0.153
## CTL4.0707    -0.034    -0.152    -0.066    -0.074
## CTL5.0707    -0.244    -0.404    -0.288    -0.293
## CTL6.0707    -0.031     0.156     0.101     0.112
## CTL7.0707    -0.184    -0.178    -0.211    -0.223
## CKO1.0707    -0.182    -0.254    -0.283    -0.333
## CKO2.0707     0.089     0.098     0.050     0.053
## CKO3.0707    -0.247    -0.137    -0.168    -0.180
## CKO5.0707     0.594     0.548     0.635     0.663
## CKO6.0707     0.062     0.048     0.180     0.166
## CKO7.0707     0.316     0.491     0.358     0.409
## CKO8.0707    -0.026     0.018    -0.067    -0.080
#write.csv(OE_result1$OE, "figure20240407/1.LUNG_only/sig.score/pathways/OverallExpression.GOlist.SS2_20210707.csv")
design.cnt <- factor(c(rep("CTL",7),
                       rep("CKO",7)),
                     levels = c("CTL","CKO"))
design.cnt
##  [1] CTL CTL CTL CTL CTL CTL CTL CKO CKO CKO CKO CKO CKO CKO
## Levels: CTL CKO
OE_melt1 <- reshape2::melt(OE_result1$OE)
OE_melt1$condition <- rep(design.cnt,4)
OE_melt1[1:10,]
##         Var1      Var2  value condition
## 1  CTL1.0707 GO0016042 -0.134       CTL
## 2  CTL2.0707 GO0016042 -0.068       CTL
## 3  CTL3.0707 GO0016042  0.091       CTL
## 4  CTL4.0707 GO0016042 -0.034       CTL
## 5  CTL5.0707 GO0016042 -0.244       CTL
## 6  CTL6.0707 GO0016042 -0.031       CTL
## 7  CTL7.0707 GO0016042 -0.184       CTL
## 8  CKO1.0707 GO0016042 -0.182       CKO
## 9  CKO2.0707 GO0016042  0.089       CKO
## 10 CKO3.0707 GO0016042 -0.247       CKO
gg.OE1 <- ggplot(OE_melt1, aes(x = condition, y=value,color=condition)) +
  #geom_boxplot() +
    #ylim(c(-0.25,0.35)) +
  geom_boxplot(outlier.size = 0, outlier.alpha = 0)+
    geom_jitter(width = 0.12) +
  #facet_grid(rows = "Var2") + 
  facet_wrap(~Var2,ncol = 2)+
    #stat_summary(fun.y=mean, geom="point", shape=18, size=3, color="black") +
    ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "t.test",
                               comparisons = list(c("CTL","CKO")),
                               label.y = c(0.65,0.65),
                               size=2.1) +
      scale_color_manual(values = color.cnt[1:2]) +
    labs(x="",y="Overall Expression of GO.list", title = " ") +
    theme(panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(), 
          panel.background = element_blank(), 
          panel.border = element_rect(fill = NA)) + coord_cartesian(ylim=(c(-0.6,0.8))) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))
gg.OE1

single

for(nn in names(go.list)){
  
  GEX.seur <- add_geneset_score(obj = GEX.seur,
                                Assay = "RNA",
                                geneset = go.list[[nn]],
                                setname = paste0("score.",nn))
  
}
## Summarizing data 
## Summarizing data 
## Summarizing data 
## Summarizing data
pp.Anno2 <- VlnPlot(GEX.seur, features = paste0("score.",names(go.list)), 
                    group.by = "Anno2", cols = color.A2, ncol = 2, pt.size = 0) +
    
  NoLegend() &
  geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.15) & 
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & labs(x="")
pp.Anno2

pp.Anno2_cnt <- VlnPlot(GEX.seur, features = paste0("score.",names(go.list)), 
                        group.by = "cnt2", cols = rep(color.cnt,13), ncol = 2, pt.size = 0) +
    
  NoLegend() &
  geom_jitter(alpha=0.15, shape=16, width = 0.2, size = 0.15) & 
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & labs(x="") &
    stat_compare_means(aes(label= ..p.signif..),
                       method = "wilcox.test",
                       comparisons = list.cnt2,
                       label.y = c(rep(0.135,13)),
                       size = 3.5)
pp.Anno2_cnt

pp.ILC2_cnt <- VlnPlot(subset(GEX.seur, subset=Anno2 %in% c("ILC2")), 
                       features = paste0("score.",names(go.list)), group.by = "cnt", cols = color.cnt, ncol = 2, pt.size = 0) +
    
  NoLegend() &
  geom_jitter(alpha=0.35, shape=16, width = 0.2, size = 0.15) & 
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) #& coord_cartesian(ylim = c(-0.13,0.13)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG_CTL","LUNG_CKO")),
                               label.y = c(0.05),
                               size=2.75
                               )
## geom_signif: na.rm = FALSE, extend_line = 0, parse = FALSE, orientation = NA
## stat_signif: na.rm = FALSE, comparisons = list(c("LUNG_CTL", "LUNG_CKO")), test = wilcox.test, test.args = list(paired = FALSE), annotations = NULL, map_signif_level = FALSE, y_position = 0.05, xmin = NULL, xmax = NULL, margin_top = 0.05, step_increase = 0, tip_length = 0.03, manual = FALSE, orientation = NA
## position_identity
pp.ILC2_cnt

names(go.list)
## [1] "GO0016042" "GO0006635" "GO0034440" "GO0019395"
pscore.GO <- list()
pscore.GO[["GO0016042"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("ILC2")), 
                                    features = "score.GO0016042", group.by = "cnt", cols = color.cnt) + 
  labs(x="ILC2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.035),
                               size=2.75
                               )
pscore.GO[["GO0016042"]]

pscore.GO[["GO0006635"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("ILC2")), 
                                    features = "score.GO0006635", group.by = "cnt", cols = color.cnt) + 
  labs(x="ILC2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.125),
                               size=2.75
                               )
pscore.GO[["GO0006635"]]

pscore.GO[["GO0034440"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("ILC2")), 
                                    features = "score.GO0034440", group.by = "cnt", cols = color.cnt) + 
  labs(x="ILC2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.095),
                               size=2.75
                               )
pscore.GO[["GO0034440"]]

pscore.GO[["GO0019395"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("ILC2")), 
                                    features = "score.GO0019395", group.by = "cnt", cols = color.cnt) + 
  labs(x="ILC2 only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(0.1),
                               size=2.75
                               )
pscore.GO[["GO0019395"]]

cowplot::plot_grid(
  plotlist = pscore.GO,
  ncol = 2
)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

new plot

new plot of old figures

# previous code see J:\projects_local\projects\20220506_10x_RZB\check_0810
#     check   Tlr2/Tlr4/Ager/Cxcr2/Cxcl2    All/Ctrl Lung,   Violin
check.m <- c("Mmp9","Tlr2","Tlr4","Ager",
                   "Cxcr2","Cxcl2","Zyx","Cd33")
#GEX.seur@meta.data[,check.m ] <- t(GEX.seur@assays$RNA@data[check.m,])
#GEX.seur@meta.data[,check.m ] <- NULL
ppm.NEU <- list()
ppm.NEU[["Mmp9"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Mmp9", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(4.235),
                               size=2.75
                               )
ppm.NEU[["Mmp9"]]

ppm.NEU[["Tlr2"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Tlr2", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(4.135),
                               size=2.75
                               )
ppm.NEU[["Tlr2"]]

ppm.NEU[["Tlr4"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Tlr4", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(3.235),
                               size=2.75
                               )
ppm.NEU[["Tlr4"]]

ppm.NEU[["Ager"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Ager", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(2.235),
                               size=2.75
                               )
ppm.NEU[["Ager"]]

ppm.NEU[["Cxcr2"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Cxcr2", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(4.235),
                               size=2.75
                               )
ppm.NEU[["Cxcr2"]]

ppm.NEU[["Cxcl2"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Cxcl2", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(6.235),
                               size=2.75
                               )
ppm.NEU[["Cxcl2"]]

ppm.NEU[["Zyx"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Zyx", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(4.035),
                               size=2.75
                               )
ppm.NEU[["Zyx"]]

ppm.NEU[["Cd33"]] <- VlnPlot(subset(GEX.seur, subset = Anno2 %in% c("NEU")), features = "Cd33", group.by = "cnt", cols = color.cnt) + labs(x="NEU only") + NoLegend() +
  #geom_jitter(alpha=0.1, shape=16, width = 0.2, size = 0.15) & #labs(title = "") &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
  stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55) & #coord_cartesian(ylim = c(0,5)) &  
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("LUNG.CTL","LUNG.CKO")),
                               label.y = c(4.235),
                               size=2.75
                               )
ppm.NEU[["Cd33"]]

cowplot::plot_grid(
  plotlist = ppm.NEU,
  ncol = 4
)
## Warning: Removed 1 rows containing missing values (geom_point).

ILC2.1vs.2 DEG0705

#GEX.seur <- readRDS("./sc10x_RZB.LUNG_Anno.rds")
GEX.seur
## An object of class Seurat 
## 18826 features across 10800 samples within 1 assay 
## Active assay: RNA (18826 features, 1267 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(subset(GEX.seur, subset=Anno2 %in% c("ILC2") & UMAP_2 >8 ), reduction = "umap", label = T, group.by = "Anno1") +
  DimPlot(subset(GEX.seur, subset=Anno2 %in% c("ILC2")  & UMAP_2 >8 ), reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

table(GEX.seur@meta.data[,c("Anno1","cnt")])
##         cnt
## Anno1    LUNG.CTL LUNG.CKO
##   Bcell        62       73
##   Plasma       50       66
##   CD8T        106      327
##   Th/Tin      121      259
##   Treg         68       82
##   NK.1        221      206
##   NK.2        735      646
##   ILC2.0      448      203
##   ILC2.1      190      128
##   ILC2.2      119      102
##   BAS          23       19
##   EOS         114      125
##   NEU.1       381      397
##   NEU.2       567      692
##   NEU.3       328      578
##   pDC         126      137
##   cDC1         49       34
##   cDC2         75       72
##   migDC        86       67
##   Mono.1      142      207
##   Mono.2      251      303
##   Mono.3       76      121
##   IM.1        197      132
##   IM.2        206      235
##   IM.3         98       97
##   AM.0         78       47
##   AM.1        309      219

subset

idx.sub <- rownames(GEX.seur@meta.data)[GEX.seur@meta.data$Anno1 %in% c("ILC2.1","ILC2.2")  &
                                        GEX.seur@meta.data$UMAP_2 > 8 &
                                        GEX.seur@assays[['RNA']]@data["Mki67",] <0.5]

idx.sub.CTL <- rownames(GEX.seur@meta.data)[GEX.seur@meta.data$Anno1 %in% c("ILC2.1","ILC2.2")  &
                                            GEX.seur@meta.data$UMAP_2 > 8 &
                                            GEX.seur@meta.data$cnt %in% c("LUNG.CTL") &
                                            GEX.seur@assays[['RNA']]@data["Mki67",] <0.5]   # exclude most cycling cells in ILC2.1/2
GEX.sub <- subset(GEX.seur, cells= idx.sub.CTL)
GEX.sub
## An object of class Seurat 
## 18826 features across 226 samples within 1 assay 
## Active assay: RNA (18826 features, 1267 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
table(GEX.sub@meta.data[,c("Anno1","cnt")])
##         cnt
## Anno1    LUNG.CTL LUNG.CKO
##   Bcell         0        0
##   Plasma        0        0
##   CD8T          0        0
##   Th/Tin        0        0
##   Treg          0        0
##   NK.1          0        0
##   NK.2          0        0
##   ILC2.0        0        0
##   ILC2.1      117        0
##   ILC2.2      109        0
##   BAS           0        0
##   EOS           0        0
##   NEU.1         0        0
##   NEU.2         0        0
##   NEU.3         0        0
##   pDC           0        0
##   cDC1          0        0
##   cDC2          0        0
##   migDC         0        0
##   Mono.1        0        0
##   Mono.2        0        0
##   Mono.3        0        0
##   IM.1          0        0
##   IM.2          0        0
##   IM.3          0        0
##   AM.0          0        0
##   AM.1          0        0
color.A1
##       Bcell      Plasma        CD8T      Th/Tin        Treg        NK.1 
## "#CE3D32FF" "#FF1463FF" "#749B58FF" "#E4AF69FF" "#99CC00FF" "#BA6338FF" 
##        NK.2      ILC2.0      ILC2.1      ILC2.2         BAS         EOS 
## "#D58F5CFF" "#5DB1DDFF" "#5050FFFF" "#466983FF" "#802268FF" "#D595A7FF" 
##       NEU.1       NEU.2       NEU.3         pDC        cDC1        cDC2 
## "#7A65A5FF" "#837B8DFF" "#A9A9A9FF" "#5A655EFF" "#CDDEB7FF" "#6BD76BFF" 
##       migDC      Mono.1      Mono.2      Mono.3        IM.1        IM.2 
## "#660099FF" "#996600FF" "#CC9900FF" "#009966FF" "#F0E685FF" "#FFC20AFF" 
##        IM.3        AM.0        AM.1 
## "#E7C76FFF" "#008099FF" "#00CC99FF"
color.sub <- color.A1[c("ILC2.1","ILC2.2")]
color.sub
##      ILC2.1      ILC2.2 
## "#5050FFFF" "#466983FF"
umap.sub <- DimPlot(GEX.sub, reduction = "umap", label = T, group.by = "Anno1", cols = color.sub) +
  DimPlot(GEX.sub, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
umap.sub

check1.sub <- FeaturePlot(GEX.sub, features = c("Il1rl1","Il13","Stab1","Stab2"), ncol = 4, cols = c("lightgrey","red"), pt.size = 0.5)
check1.sub

check2.sub <- VlnPlot(GEX.sub, features = c("Il1rl1","Il13","Stab1","Stab2"), ncol = 4, group.by = "Anno1", cols = color.sub) &
  geom_boxplot(outlier.size = 0, fill="white", width=0.3, size=0.1, alpha=0.55) & labs(x= "LUNG.CTL") &
  stat_summary(fun=mean, geom="point", shape=18, size=2, color="black", alpha=0.55) &
  coord_cartesian(ylim=c(0,4)) &
  ggpubr::stat_compare_means(aes(lable = ..p.signif..), 
                               method = "wilcox.test",
                               comparisons = list(c("ILC2.1","ILC2.2")),
                               label.y = c(2.2),
                               size=2.5
                               )
check2.sub

ILC2.1 vs ILC2.2

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.sub) <- "Anno1"

#GEX.markers.sub <- FindAllMarkers(GEX.sub, only.pos = TRUE, min.pct = 0.05,
#                                  test.use = "MAST",
#                                  logfc.threshold = 0.1)
GEX.markers.sub <- read.table("sc10x_RZB.DEGs.LUNG.CTL_ILC2.1vs2.20240705.csv", header = TRUE, sep = ",")
#GEX.markers.sub %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)

stat

cut.padj = 0.05
cut.log2FC = 0.1   
#cut.log2FC = log2(1.5)   
#cut.log2FC = log2(2)  

cut.pct1 = 0.05

stat_sub.DEGs_Anno1 <- GEX.markers.sub %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1) %>%
                      group_by(cluster) %>%
                      summarise(up.DEGs = n()) %>% as.data.frame()
stat_sub.DEGs_Anno1
##   cluster up.DEGs
## 1  ILC2.1      52
## 2  ILC2.2      45
stat_sub.DEGs_Anno1.barplot <- GEX.markers.sub %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1) %>%
                      group_by(cluster) %>%
                      summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=cluster, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.sub, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Count",
       x = "LUNG.CTL") +
  #labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |FC|>1.5"), y = "Count") +
  #labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |FC|>2"), y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=8, face='bold'))



stat_sub.DEGs_Anno1.barplot

DEG plot

#
test.sub.combine <- GEX.markers.sub
test.sub.combine[test.sub.combine$cluster=="ILC2.1","avg_log2FC"] <- -test.sub.combine[test.sub.combine$cluster=="ILC2.1","avg_log2FC"]
pp_sub.DEGs.Anno1 <- test.sub.combine %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.15)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^mt",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"ILC2.1",ifelse(p_val_adj<0.05 & avg_log2FC>0,"ILC2.2","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=3.25, max.overlaps = 500) +
  scale_fill_manual(values = c("ILC2.1"=as.vector(color.sub[1]),
                               "ILC2.2"=as.vector(color.sub[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="LUNG.CTL ILC2.1 vs ILC2.2") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_sub.DEGs.Anno1

sub.ILC2.1 <- (GEX.markers.sub %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1 & cluster %in% c("ILC2.1")))$gene
  
sub.ILC2.2 <- (GEX.markers.sub %>% filter(p_val_adj < cut.padj & 
                          abs(avg_log2FC) > cut.log2FC & 
                          pct.1 > cut.pct1 & cluster %in% c("ILC2.2")))$gene

DotPlot(subset(GEX.seur, subset = cnt %in% c("LUNG.CTL")), features = rev(sub.ILC2.1), group.by = "Anno1")  + coord_flip() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) + labs(x = "LUNG.CTL")

DotPlot(subset(GEX.seur, subset = cnt %in% c("LUNG.CTL")), features = rev(sub.ILC2.2), group.by = "Anno1")  + coord_flip() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) + labs(x = "LUNG.CTL")

save obj for GEO

#GEX.seur <- readRDS("./sc10x_RZB.LUNG_Anno.rds")
GEX.seur
## An object of class Seurat 
## 18826 features across 10800 samples within 1 assay 
## Active assay: RNA (18826 features, 1267 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
pumap2 <-DimPlot(GEX.seur, reduction = "umap", label = T, repel = T, label.size = 2.5, group.by = "Anno1", cols = color.A1) + 
  DimPlot(GEX.seur, reduction = "umap", label = T, repel = T, label.size = 3.75, group.by = "Anno2", cols = color.A2)
pumap2

GEX.seur@meta.data[,grep("Doublet|up|cnt1|cnt2",colnames(GEX.seur@meta.data),value = T)] <- NULL
head(GEX.seur@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGGTAAGGA-1   Lung_GEX       1339          661  0.2240478   3.286034
## AAACCCACAAACGTGG-1   Lung_GEX       1838         1077  3.1011970  13.003264
## AAACCCACAACACAAA-1   Lung_GEX       1362          745  0.3671072   2.863436
## AAACCCACAAGCTGCC-1   Lung_GEX       1415          682  0.4946996   2.756184
## AAACCCACACAAACGG-1   Lung_GEX       1312          649  0.3810976   1.905488
## AAACCCACAGCAAGAC-1   Lung_GEX        744          465  0.4032258   5.913978
##                      FB.info      S.Score   G2M.Score Phase seurat_clusters
## AAACCCAAGGTAAGGA-1 LUNG.CTL2 -0.058333333 -0.03422477    G1               2
## AAACCCACAAACGTGG-1 LUNG.CKO2 -0.017095792 -0.04855142    G1               0
## AAACCCACAACACAAA-1 LUNG.CTL2 -0.055357143 -0.08558845    G1               6
## AAACCCACAAGCTGCC-1 LUNG.CKO1 -0.059523810 -0.07794758    G1               6
## AAACCCACACAAACGG-1 LUNG.CKO1 -0.076785714 -0.02419612    G1               9
## AAACCCACAGCAAGAC-1 LUNG.CTL1  0.006035437  0.01867770   G2M               2
##                         cnt sort_clusters   SP.info preAnno1 preAnno2  rep
## AAACCCAAGGTAAGGA-1 LUNG.CTL             2 LUNG.CTL2     NEU5      NEU rep2
## AAACCCACAAACGTGG-1 LUNG.CKO             0 LUNG.CKO2      NK2       NK rep2
## AAACCCACAACACAAA-1 LUNG.CTL             6 LUNG.CTL2     NEU2      NEU rep2
## AAACCCACAAGCTGCC-1 LUNG.CKO             6 LUNG.CKO1     NEU1      NEU rep1
## AAACCCACACAAACGG-1 LUNG.CKO             9 LUNG.CKO1     NEU4      NEU rep1
## AAACCCACAGCAAGAC-1 LUNG.CTL             2 LUNG.CTL1     NEU1      NEU rep1
##                    Anno1 Anno2   UMAP_1    UMAP_2 score.Cycling score.GO0016042
## AAACCCAAGGTAAGGA-1 NEU.2   NEU 7.838557 -5.716607  -0.027212426      0.04833179
## AAACCCACAAACGTGG-1  NK.2    NK 5.685989  5.990192  -0.003880877     -0.02466604
## AAACCCACAACACAAA-1 NEU.2   NEU 6.848635 -4.713001  -0.060071969     -0.01440387
## AAACCCACAAGCTGCC-1 NEU.2   NEU 8.051816 -3.495369  -0.054866279      0.01954630
## AAACCCACACAAACGG-1 NEU.3   NEU 4.011680 -6.262844  -0.022034372      0.02962145
## AAACCCACAGCAAGAC-1 NEU.2   NEU 9.316029 -4.598191   0.084965875     -0.03083213
##                    score.GO0006635 score.GO0034440 score.GO0019395
## AAACCCAAGGTAAGGA-1     0.070040630    0.0861632147     0.055366358
## AAACCCACAAACGTGG-1     0.010412014    0.0004346464     0.008735025
## AAACCCACAACACAAA-1    -0.022048284    0.0055741955    -0.017620802
## AAACCCACAAGCTGCC-1     0.007270215   -0.0072738336    -0.026223699
## AAACCCACACAAACGG-1    -0.016310473   -0.0173372555    -0.025515183
## AAACCCACAGCAAGAC-1    -0.037100032   -0.0231897769    -0.037745314
#saveRDS(GEX.seur,"./sc10x.LUNG_only.final_Anno.rds")